Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This includes a fairly large change from Josef around data writeback
  completion.  Before, the writeback wasn't completed until the metadata
  insertions for the extent were done, and this made for fairly large
  latency spikes on the last page of each ordered extent.

  We already had a separate mechanism for tracking pending metadata
  insertions, so Josef just needed to tweak things a little to end
  writeback earlier on the page.  Overall it makes us much friendly to
  memory reclaim and lowers latencies quite a lot for synchronous IO.

  Jan Schmidt has finished some background work required to track btree
  blocks as they go through changes in ownership.  It's the missing
  piece he needed for both btrfs send/receive and subvolume quotas.
  Neither of those are ready yet, but the new tracking code is included
  here.  Most of the time, the new code is off.  It is only used by
  scrub and other backref walkers.

  Stefan Behrens has added io failure tracking.  This includes counters
  for which drives are causing the most trouble so the admin (or an
  automated tool) can choose to kick them out.  We're tracking IO
  errors, crc errors, and generation checks we do on each metadata
  block.

  RAID5/6 did miss the cut this time because I'm having trouble with
  corruptions.  I'll nail it down next week and post as a beta testing
  before 3.6"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (58 commits)
  Btrfs: fix tree mod log rewinded level and rewinding of moved keys
  Btrfs: fix tree mod log del_ptr
  Btrfs: add tree_mod_dont_log helper
  Btrfs: add missing spin_lock for insertion into tree mod log
  Btrfs: add inodes before dropping the extent lock in find_all_leafs
  Btrfs: use delayed ref sequence numbers for all fs-tree updates
  Btrfs: fix false positive in check-integrity on unmount
  Btrfs: fix runtime warning in check-integrity check data mode
  Btrfs: set ioprio of scrub readahead to idle
  Btrfs: fix return code in drop_objectid_items
  Btrfs: check to see if the inode is in the log before fsyncing
  Btrfs: return value of btrfs_read_buffer is checked correctly
  Btrfs: read device stats on mount, write modified ones during commit
  Btrfs: add ioctl to get and reset the device stats
  Btrfs: add device counters for detected IO and checksum errors
  btrfs: Drop unused function btrfs_abort_devices()
  Btrfs: fix the same inode id problem when doing auto defragment
  Btrfs: fall back to non-inline if we don't have enough space
  Btrfs: fix how we deal with the orphan block rsv
  Btrfs: convert the inode bit field to use the actual bit operations
  ...
This commit is contained in:
Linus Torvalds 2012-06-01 08:37:31 -07:00
commit 51eab603f5
33 changed files with 2850 additions and 867 deletions

View file

@ -227,7 +227,11 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
} else {
cache_no_acl(inode);
}
} else {
cache_no_acl(inode);
}
failed:
posix_acl_release(acl);

View file

@ -24,22 +24,135 @@
#include "delayed-ref.h"
#include "locking.h"
struct extent_inode_elem {
u64 inum;
u64 offset;
struct extent_inode_elem *next;
};
static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
struct btrfs_file_extent_item *fi,
u64 extent_item_pos,
struct extent_inode_elem **eie)
{
u64 data_offset;
u64 data_len;
struct extent_inode_elem *e;
data_offset = btrfs_file_extent_offset(eb, fi);
data_len = btrfs_file_extent_num_bytes(eb, fi);
if (extent_item_pos < data_offset ||
extent_item_pos >= data_offset + data_len)
return 1;
e = kmalloc(sizeof(*e), GFP_NOFS);
if (!e)
return -ENOMEM;
e->next = *eie;
e->inum = key->objectid;
e->offset = key->offset + (extent_item_pos - data_offset);
*eie = e;
return 0;
}
static int find_extent_in_eb(struct extent_buffer *eb, u64 wanted_disk_byte,
u64 extent_item_pos,
struct extent_inode_elem **eie)
{
u64 disk_byte;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
int slot;
int nritems;
int extent_type;
int ret;
/*
* from the shared data ref, we only have the leaf but we need
* the key. thus, we must look into all items and see that we
* find one (some) with a reference to our extent item.
*/
nritems = btrfs_header_nritems(eb);
for (slot = 0; slot < nritems; ++slot) {
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(eb, fi);
if (extent_type == BTRFS_FILE_EXTENT_INLINE)
continue;
/* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
if (disk_byte != wanted_disk_byte)
continue;
ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie);
if (ret < 0)
return ret;
}
return 0;
}
/*
* this structure records all encountered refs on the way up to the root
*/
struct __prelim_ref {
struct list_head list;
u64 root_id;
struct btrfs_key key;
struct btrfs_key key_for_search;
int level;
int count;
struct extent_inode_elem *inode_list;
u64 parent;
u64 wanted_disk_byte;
};
/*
* the rules for all callers of this function are:
* - obtaining the parent is the goal
* - if you add a key, you must know that it is a correct key
* - if you cannot add the parent or a correct key, then we will look into the
* block later to set a correct key
*
* delayed refs
* ============
* backref type | shared | indirect | shared | indirect
* information | tree | tree | data | data
* --------------------+--------+----------+--------+----------
* parent logical | y | - | - | -
* key to resolve | - | y | y | y
* tree block logical | - | - | - | -
* root for resolving | y | y | y | y
*
* - column 1: we've the parent -> done
* - column 2, 3, 4: we use the key to find the parent
*
* on disk refs (inline or keyed)
* ==============================
* backref type | shared | indirect | shared | indirect
* information | tree | tree | data | data
* --------------------+--------+----------+--------+----------
* parent logical | y | - | y | -
* key to resolve | - | - | - | y
* tree block logical | y | y | y | y
* root for resolving | - | y | y | y
*
* - column 1, 3: we've the parent -> done
* - column 2: we take the first key from the block to find the parent
* (see __add_missing_keys)
* - column 4: we use the key to find the parent
*
* additional information that's available but not required to find the parent
* block might help in merging entries to gain some speed.
*/
static int __add_prelim_ref(struct list_head *head, u64 root_id,
struct btrfs_key *key, int level, u64 parent,
u64 wanted_disk_byte, int count)
struct btrfs_key *key, int level,
u64 parent, u64 wanted_disk_byte, int count)
{
struct __prelim_ref *ref;
@ -50,10 +163,11 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
ref->root_id = root_id;
if (key)
ref->key = *key;
ref->key_for_search = *key;
else
memset(&ref->key, 0, sizeof(ref->key));
memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
ref->inode_list = NULL;
ref->level = level;
ref->count = count;
ref->parent = parent;
@ -64,18 +178,26 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
}
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
struct ulist *parents,
struct extent_buffer *eb, int level,
u64 wanted_objectid, u64 wanted_disk_byte)
struct ulist *parents, int level,
struct btrfs_key *key, u64 wanted_disk_byte,
const u64 *extent_item_pos)
{
int ret;
int slot;
int slot = path->slots[level];
struct extent_buffer *eb = path->nodes[level];
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
struct extent_inode_elem *eie = NULL;
u64 disk_byte;
u64 wanted_objectid = key->objectid;
add_parent:
ret = ulist_add(parents, eb->start, 0, GFP_NOFS);
if (level == 0 && extent_item_pos) {
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie);
if (ret < 0)
return ret;
}
ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS);
if (ret < 0)
return ret;
@ -89,6 +211,7 @@ add_parent:
* repeat this until we don't find any additional EXTENT_DATA items.
*/
while (1) {
eie = NULL;
ret = btrfs_next_leaf(root, path);
if (ret < 0)
return ret;
@ -97,9 +220,9 @@ add_parent:
eb = path->nodes[0];
for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) {
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.objectid != wanted_objectid ||
key.type != BTRFS_EXTENT_DATA_KEY)
btrfs_item_key_to_cpu(eb, key, slot);
if (key->objectid != wanted_objectid ||
key->type != BTRFS_EXTENT_DATA_KEY)
return 0;
fi = btrfs_item_ptr(eb, slot,
struct btrfs_file_extent_item);
@ -118,8 +241,10 @@ add_parent:
*/
static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
int search_commit_root,
u64 time_seq,
struct __prelim_ref *ref,
struct ulist *parents)
struct ulist *parents,
const u64 *extent_item_pos)
{
struct btrfs_path *path;
struct btrfs_root *root;
@ -152,12 +277,13 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
path->lowest_level = level;
ret = btrfs_search_slot(NULL, root, &ref->key, path, 0, 0);
ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
pr_debug("search slot in root %llu (level %d, ref count %d) returned "
"%d for key (%llu %u %llu)\n",
(unsigned long long)ref->root_id, level, ref->count, ret,
(unsigned long long)ref->key.objectid, ref->key.type,
(unsigned long long)ref->key.offset);
(unsigned long long)ref->key_for_search.objectid,
ref->key_for_search.type,
(unsigned long long)ref->key_for_search.offset);
if (ret < 0)
goto out;
@ -179,9 +305,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
}
/* the last two parameters will only be used for level == 0 */
ret = add_all_parents(root, path, parents, eb, level, key.objectid,
ref->wanted_disk_byte);
ret = add_all_parents(root, path, parents, level, &key,
ref->wanted_disk_byte, extent_item_pos);
out:
btrfs_free_path(path);
return ret;
@ -191,8 +316,9 @@ out:
* resolve all indirect backrefs from the list
*/
static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
int search_commit_root,
struct list_head *head)
int search_commit_root, u64 time_seq,
struct list_head *head,
const u64 *extent_item_pos)
{
int err;
int ret = 0;
@ -201,6 +327,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
struct __prelim_ref *new_ref;
struct ulist *parents;
struct ulist_node *node;
struct ulist_iterator uiter;
parents = ulist_alloc(GFP_NOFS);
if (!parents)
@ -217,7 +344,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
if (ref->count == 0)
continue;
err = __resolve_indirect_ref(fs_info, search_commit_root,
ref, parents);
time_seq, ref, parents,
extent_item_pos);
if (err) {
if (ret == 0)
ret = err;
@ -225,11 +353,14 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
}
/* we put the first parent into the ref at hand */
node = ulist_next(parents, NULL);
ULIST_ITER_INIT(&uiter);
node = ulist_next(parents, &uiter);
ref->parent = node ? node->val : 0;
ref->inode_list =
node ? (struct extent_inode_elem *)node->aux : 0;
/* additional parents require new refs being added here */
while ((node = ulist_next(parents, node))) {
while ((node = ulist_next(parents, &uiter))) {
new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
if (!new_ref) {
ret = -ENOMEM;
@ -237,6 +368,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
}
memcpy(new_ref, ref, sizeof(*ref));
new_ref->parent = node->val;
new_ref->inode_list =
(struct extent_inode_elem *)node->aux;
list_add(&new_ref->list, &ref->list);
}
ulist_reinit(parents);
@ -246,10 +379,65 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
return ret;
}
static inline int ref_for_same_block(struct __prelim_ref *ref1,
struct __prelim_ref *ref2)
{
if (ref1->level != ref2->level)
return 0;
if (ref1->root_id != ref2->root_id)
return 0;
if (ref1->key_for_search.type != ref2->key_for_search.type)
return 0;
if (ref1->key_for_search.objectid != ref2->key_for_search.objectid)
return 0;
if (ref1->key_for_search.offset != ref2->key_for_search.offset)
return 0;
if (ref1->parent != ref2->parent)
return 0;
return 1;
}
/*
* read tree blocks and add keys where required.
*/
static int __add_missing_keys(struct btrfs_fs_info *fs_info,
struct list_head *head)
{
struct list_head *pos;
struct extent_buffer *eb;
list_for_each(pos, head) {
struct __prelim_ref *ref;
ref = list_entry(pos, struct __prelim_ref, list);
if (ref->parent)
continue;
if (ref->key_for_search.type)
continue;
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
fs_info->tree_root->leafsize, 0);
BUG_ON(!eb);
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
else
btrfs_node_key_to_cpu(eb, &ref->key_for_search, 0);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
return 0;
}
/*
* merge two lists of backrefs and adjust counts accordingly
*
* mode = 1: merge identical keys, if key is set
* FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
* additionally, we could even add a key range for the blocks we
* looked into to merge even more (-> replace unresolved refs by those
* having a parent).
* mode = 2: merge identical parents
*/
static int __merge_refs(struct list_head *head, int mode)
@ -263,20 +451,21 @@ static int __merge_refs(struct list_head *head, int mode)
ref1 = list_entry(pos1, struct __prelim_ref, list);
if (mode == 1 && ref1->key.type == 0)
continue;
for (pos2 = pos1->next, n2 = pos2->next; pos2 != head;
pos2 = n2, n2 = pos2->next) {
struct __prelim_ref *ref2;
struct __prelim_ref *xchg;
ref2 = list_entry(pos2, struct __prelim_ref, list);
if (mode == 1) {
if (memcmp(&ref1->key, &ref2->key,
sizeof(ref1->key)) ||
ref1->level != ref2->level ||
ref1->root_id != ref2->root_id)
if (!ref_for_same_block(ref1, ref2))
continue;
if (!ref1->parent && ref2->parent) {
xchg = ref1;
ref1 = ref2;
ref2 = xchg;
}
ref1->count += ref2->count;
} else {
if (ref1->parent != ref2->parent)
@ -296,16 +485,17 @@ static int __merge_refs(struct list_head *head, int mode)
* smaller or equal that seq to the list
*/
static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_key *info_key,
struct list_head *prefs)
{
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct rb_node *n = &head->node.rb_node;
struct btrfs_key key;
struct btrfs_key op_key = {0};
int sgn;
int ret = 0;
if (extent_op && extent_op->update_key)
btrfs_disk_key_to_cpu(info_key, &extent_op->key);
btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
while ((n = rb_prev(n))) {
struct btrfs_delayed_ref_node *node;
@ -337,7 +527,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_tree_ref *ref;
ref = btrfs_delayed_node_to_tree_ref(node);
ret = __add_prelim_ref(prefs, ref->root, info_key,
ret = __add_prelim_ref(prefs, ref->root, &op_key,
ref->level + 1, 0, node->bytenr,
node->ref_mod * sgn);
break;
@ -346,7 +536,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_tree_ref *ref;
ref = btrfs_delayed_node_to_tree_ref(node);
ret = __add_prelim_ref(prefs, ref->root, info_key,
ret = __add_prelim_ref(prefs, ref->root, NULL,
ref->level + 1, ref->parent,
node->bytenr,
node->ref_mod * sgn);
@ -354,8 +544,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
}
case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_delayed_data_ref *ref;
struct btrfs_key key;
ref = btrfs_delayed_node_to_data_ref(node);
key.objectid = ref->objectid;
@ -368,7 +556,6 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
}
case BTRFS_SHARED_DATA_REF_KEY: {
struct btrfs_delayed_data_ref *ref;
struct btrfs_key key;
ref = btrfs_delayed_node_to_data_ref(node);
@ -394,8 +581,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
*/
static int __add_inline_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 bytenr,
struct btrfs_key *info_key, int *info_level,
struct list_head *prefs)
int *info_level, struct list_head *prefs)
{
int ret = 0;
int slot;
@ -411,7 +597,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
* enumerate all inline refs
*/
leaf = path->nodes[0];
slot = path->slots[0] - 1;
slot = path->slots[0];
item_size = btrfs_item_size_nr(leaf, slot);
BUG_ON(item_size < sizeof(*ei));
@ -424,12 +610,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info;
struct btrfs_disk_key disk_key;
info = (struct btrfs_tree_block_info *)ptr;
*info_level = btrfs_tree_block_level(leaf, info);
btrfs_tree_block_key(leaf, info, &disk_key);
btrfs_disk_key_to_cpu(info_key, &disk_key);
ptr += sizeof(struct btrfs_tree_block_info);
BUG_ON(ptr > end);
} else {
@ -447,7 +630,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
switch (type) {
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, 0, info_key,
ret = __add_prelim_ref(prefs, 0, NULL,
*info_level + 1, offset,
bytenr, 1);
break;
@ -462,8 +645,9 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
break;
}
case BTRFS_TREE_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, offset, info_key,
*info_level + 1, 0, bytenr, 1);
ret = __add_prelim_ref(prefs, offset, NULL,
*info_level + 1, 0,
bytenr, 1);
break;
case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_extent_data_ref *dref;
@ -477,8 +661,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
root = btrfs_extent_data_ref_root(leaf, dref);
ret = __add_prelim_ref(prefs, root, &key, 0, 0, bytenr,
count);
ret = __add_prelim_ref(prefs, root, &key, 0, 0,
bytenr, count);
break;
}
default:
@ -496,8 +680,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
*/
static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 bytenr,
struct btrfs_key *info_key, int info_level,
struct list_head *prefs)
int info_level, struct list_head *prefs)
{
struct btrfs_root *extent_root = fs_info->extent_root;
int ret;
@ -527,7 +710,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
switch (key.type) {
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, 0, info_key,
ret = __add_prelim_ref(prefs, 0, NULL,
info_level + 1, key.offset,
bytenr, 1);
break;
@ -543,8 +726,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
break;
}
case BTRFS_TREE_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, key.offset, info_key,
info_level + 1, 0, bytenr, 1);
ret = __add_prelim_ref(prefs, key.offset, NULL,
info_level + 1, 0,
bytenr, 1);
break;
case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_extent_data_ref *dref;
@ -560,7 +744,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
root = btrfs_extent_data_ref_root(leaf, dref);
ret = __add_prelim_ref(prefs, root, &key, 0, 0,
bytenr, count);
bytenr, count);
break;
}
default:
@ -582,11 +766,12 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 seq, struct ulist *refs, struct ulist *roots)
u64 delayed_ref_seq, u64 time_seq,
struct ulist *refs, struct ulist *roots,
const u64 *extent_item_pos)
{
struct btrfs_key key;
struct btrfs_path *path;
struct btrfs_key info_key = { 0 };
struct btrfs_delayed_ref_root *delayed_refs = NULL;
struct btrfs_delayed_ref_head *head;
int info_level = 0;
@ -645,7 +830,7 @@ again:
btrfs_put_delayed_ref(&head->node);
goto again;
}
ret = __add_delayed_refs(head, seq, &info_key,
ret = __add_delayed_refs(head, delayed_ref_seq,
&prefs_delayed);
if (ret) {
spin_unlock(&delayed_refs->lock);
@ -659,16 +844,17 @@ again:
struct extent_buffer *leaf;
int slot;
path->slots[0]--;
leaf = path->nodes[0];
slot = path->slots[0] - 1;
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid == bytenr &&
key.type == BTRFS_EXTENT_ITEM_KEY) {
ret = __add_inline_refs(fs_info, path, bytenr,
&info_key, &info_level, &prefs);
&info_level, &prefs);
if (ret)
goto out;
ret = __add_keyed_refs(fs_info, path, bytenr, &info_key,
ret = __add_keyed_refs(fs_info, path, bytenr,
info_level, &prefs);
if (ret)
goto out;
@ -676,21 +862,18 @@ again:
}
btrfs_release_path(path);
/*
* when adding the delayed refs above, the info_key might not have
* been known yet. Go over the list and replace the missing keys
*/
list_for_each_entry(ref, &prefs_delayed, list) {
if ((ref->key.offset | ref->key.type | ref->key.objectid) == 0)
memcpy(&ref->key, &info_key, sizeof(ref->key));
}
list_splice_init(&prefs_delayed, &prefs);
ret = __add_missing_keys(fs_info, &prefs);
if (ret)
goto out;
ret = __merge_refs(&prefs, 1);
if (ret)
goto out;
ret = __resolve_indirect_refs(fs_info, search_commit_root, &prefs);
ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
&prefs, extent_item_pos);
if (ret)
goto out;
@ -709,7 +892,33 @@ again:
BUG_ON(ret < 0);
}
if (ref->count && ref->parent) {
ret = ulist_add(refs, ref->parent, 0, GFP_NOFS);
struct extent_inode_elem *eie = NULL;
if (extent_item_pos && !ref->inode_list) {
u32 bsz;
struct extent_buffer *eb;
bsz = btrfs_level_size(fs_info->extent_root,
info_level);
eb = read_tree_block(fs_info->extent_root,
ref->parent, bsz, 0);
BUG_ON(!eb);
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
ref->inode_list = eie;
free_extent_buffer(eb);
}
ret = ulist_add_merge(refs, ref->parent,
(unsigned long)ref->inode_list,
(unsigned long *)&eie, GFP_NOFS);
if (!ret && extent_item_pos) {
/*
* we've recorded that parent, so we must extend
* its inode list here
*/
BUG_ON(!eie);
while (eie->next)
eie = eie->next;
eie->next = ref->inode_list;
}
BUG_ON(ret < 0);
}
kfree(ref);
@ -734,6 +943,28 @@ out:
return ret;
}
static void free_leaf_list(struct ulist *blocks)
{
struct ulist_node *node = NULL;
struct extent_inode_elem *eie;
struct extent_inode_elem *eie_next;
struct ulist_iterator uiter;
ULIST_ITER_INIT(&uiter);
while ((node = ulist_next(blocks, &uiter))) {
if (!node->aux)
continue;
eie = (struct extent_inode_elem *)node->aux;
for (; eie; eie = eie_next) {
eie_next = eie->next;
kfree(eie);
}
node->aux = 0;
}
ulist_free(blocks);
}
/*
* Finds all leafs with a reference to the specified combination of bytenr and
* offset. key_list_head will point to a list of corresponding keys (caller must
@ -744,7 +975,9 @@ out:
*/
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 seq, struct ulist **leafs)
u64 delayed_ref_seq, u64 time_seq,
struct ulist **leafs,
const u64 *extent_item_pos)
{
struct ulist *tmp;
int ret;
@ -758,11 +991,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
ret = find_parent_nodes(trans, fs_info, bytenr, seq, *leafs, tmp);
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
time_seq, *leafs, tmp, extent_item_pos);
ulist_free(tmp);
if (ret < 0 && ret != -ENOENT) {
ulist_free(*leafs);
free_leaf_list(*leafs);
return ret;
}
@ -784,10 +1018,12 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 seq, struct ulist **roots)
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
struct ulist_iterator uiter;
int ret;
tmp = ulist_alloc(GFP_NOFS);
@ -799,15 +1035,16 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, seq,
tmp, *roots);
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
time_seq, tmp, *roots, NULL);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
return ret;
}
node = ulist_next(tmp, node);
node = ulist_next(tmp, &uiter);
if (!node)
break;
bytenr = node->val;
@ -1093,67 +1330,25 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 0;
}
static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, u64 logical,
u64 orig_extent_item_objectid,
u64 extent_item_pos, u64 root,
static int iterate_leaf_refs(struct extent_inode_elem *inode_list,
u64 root, u64 extent_item_objectid,
iterate_extent_inodes_t *iterate, void *ctx)
{
u64 disk_byte;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb;
int slot;
int nritems;
struct extent_inode_elem *eie;
int ret = 0;
int extent_type;
u64 data_offset;
u64 data_len;
eb = read_tree_block(fs_info->tree_root, logical,
fs_info->tree_root->leafsize, 0);
if (!eb)
return -EIO;
/*
* from the shared data ref, we only have the leaf but we need
* the key. thus, we must look into all items and see that we
* find one (some) with a reference to our extent item.
*/
nritems = btrfs_header_nritems(eb);
for (slot = 0; slot < nritems; ++slot) {
btrfs_item_key_to_cpu(eb, &key, slot);
if (key.type != BTRFS_EXTENT_DATA_KEY)
continue;
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(eb, fi);
if (extent_type == BTRFS_FILE_EXTENT_INLINE)
continue;
/* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
if (disk_byte != orig_extent_item_objectid)
continue;
data_offset = btrfs_file_extent_offset(eb, fi);
data_len = btrfs_file_extent_num_bytes(eb, fi);
if (extent_item_pos < data_offset ||
extent_item_pos >= data_offset + data_len)
continue;
for (eie = inode_list; eie; eie = eie->next) {
pr_debug("ref for %llu resolved, key (%llu EXTEND_DATA %llu), "
"root %llu\n", orig_extent_item_objectid,
key.objectid, key.offset, root);
ret = iterate(key.objectid,
key.offset + (extent_item_pos - data_offset),
root, ctx);
"root %llu\n", extent_item_objectid,
eie->inum, eie->offset, root);
ret = iterate(eie->inum, eie->offset, root, ctx);
if (ret) {
pr_debug("stopping iteration because ret=%d\n", ret);
pr_debug("stopping iteration for %llu due to ret=%d\n",
extent_item_objectid, ret);
break;
}
}
free_extent_buffer(eb);
return ret;
}
@ -1175,7 +1370,10 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
struct seq_list seq_elem;
struct seq_list seq_elem = {};
struct seq_list tree_mod_seq_elem = {};
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
struct btrfs_delayed_ref_root *delayed_refs = NULL;
pr_debug("resolving all inodes for extent %llu\n",
@ -1192,34 +1390,41 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_refs->lock);
btrfs_get_delayed_seq(delayed_refs, &seq_elem);
spin_unlock(&delayed_refs->lock);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
extent_item_pos, seq_elem.seq,
&refs);
seq_elem.seq, tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
if (ret)
goto out;
while (!ret && (ref_node = ulist_next(refs, ref_node))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val, -1,
seq_elem.seq, &roots);
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
seq_elem.seq,
tree_mod_seq_elem.seq, &roots);
if (ret)
break;
while (!ret && (root_node = ulist_next(roots, root_node))) {
pr_debug("root %llu references leaf %llu\n",
root_node->val, ref_node->val);
ret = iterate_leaf_refs(fs_info, ref_node->val,
extent_item_objectid,
extent_item_pos, root_node->val,
iterate, ctx);
ULIST_ITER_INIT(&root_uiter);
while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
pr_debug("root %llu references leaf %llu, data list "
"%#lx\n", root_node->val, ref_node->val,
ref_node->aux);
ret = iterate_leaf_refs(
(struct extent_inode_elem *)ref_node->aux,
root_node->val, extent_item_objectid,
iterate, ctx);
}
ulist_free(roots);
roots = NULL;
}
ulist_free(refs);
free_leaf_list(refs);
ulist_free(roots);
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_put_delayed_seq(delayed_refs, &seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
}

View file

@ -58,7 +58,8 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 seq, struct ulist **roots);
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,

View file

@ -24,6 +24,20 @@
#include "ordered-data.h"
#include "delayed-inode.h"
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
#define BTRFS_INODE_ORDERED_DATA_CLOSE 0
#define BTRFS_INODE_ORPHAN_META_RESERVED 1
#define BTRFS_INODE_DUMMY 2
#define BTRFS_INODE_IN_DEFRAG 3
#define BTRFS_INODE_DELALLOC_META_RESERVED 4
#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
/* in memory btrfs inode */
struct btrfs_inode {
/* which subvolume this inode belongs to */
@ -57,9 +71,6 @@ struct btrfs_inode {
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
/* for keeping track of orphaned inodes */
struct list_head i_orphan;
/* list of all the delalloc inodes in the FS. There are times we need
* to write all the delalloc pages to disk, and this list is used
* to walk them all.
@ -78,14 +89,13 @@ struct btrfs_inode {
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
u64 generation;
/* sequence number for NFS changes */
u64 sequence;
/*
* transid of the trans_handle that last modified this inode
*/
@ -144,23 +154,10 @@ struct btrfs_inode {
unsigned outstanding_extents;
unsigned reserved_extents;
/*
* ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set
* the btrfs file release call will add this inode to the
* ordered operations list so that we make sure to flush out any
* new data the application may have written before commit.
*/
unsigned ordered_data_close:1;
unsigned orphan_meta_reserved:1;
unsigned dummy_inode:1;
unsigned in_defrag:1;
unsigned delalloc_meta_reserved:1;
/*
* always compress this one file
*/
unsigned force_compress:4;
unsigned force_compress;
struct btrfs_delayed_node *delayed_node;
@ -202,4 +199,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
return false;
}
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == generation &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -173,6 +173,9 @@ static int btrfs_csum_sizes[] = { 4, 0 };
#define BTRFS_FT_XATTR 8
#define BTRFS_FT_MAX 9
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
@ -823,6 +826,14 @@ struct btrfs_csum_item {
u8 csum;
} __attribute__ ((__packed__));
struct btrfs_dev_stats_item {
/*
* grow this item struct at the end for future enhancements and keep
* the existing values unchanged
*/
__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
} __attribute__ ((__packed__));
/* different types of block groups (and chunks) */
#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
@ -1129,6 +1140,15 @@ struct btrfs_fs_info {
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
struct rb_root tree_mod_log;
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
@ -1375,7 +1395,7 @@ struct btrfs_root {
struct list_head root_list;
spinlock_t orphan_lock;
struct list_head orphan_list;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
int orphan_item_inserted;
int orphan_cleanup_state;
@ -1507,6 +1527,12 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_BALANCE_ITEM_KEY 248
/*
* Persistantly stores the io stats in the device tree.
* One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
*/
#define BTRFS_DEV_STATS_KEY 249
/*
* string items are for debugging. They just store a short string of
* data in the FS
@ -2415,6 +2441,30 @@ static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
return btrfs_item_size(eb, e) - offset;
}
/* btrfs_dev_stats_item */
static inline u64 btrfs_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
int index)
{
u64 val;
read_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
return val;
}
static inline void btrfs_set_dev_stats_value(struct extent_buffer *eb,
struct btrfs_dev_stats_item *ptr,
int index, u64 val)
{
write_extent_buffer(eb, &val,
offsetof(struct btrfs_dev_stats_item, values) +
((unsigned long)ptr) + (index * sizeof(u64)),
sizeof(val));
}
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
@ -2496,11 +2546,11 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size, int for_cow);
u64 hint, u64 empty_size);
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref, int for_cow);
u64 parent, int last_ref);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@ -2659,6 +2709,8 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
struct btrfs_path *p, u64 time_seq);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
@ -3098,4 +3150,23 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
/* delayed seq elem */
struct seq_list {
struct list_head list;
u64 seq;
u32 flags;
};
void btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
(s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
#endif

View file

@ -669,8 +669,8 @@ static int btrfs_delayed_inode_reserve_metadata(
return ret;
} else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
spin_lock(&BTRFS_I(inode)->lock);
if (BTRFS_I(inode)->delalloc_meta_reserved) {
BTRFS_I(inode)->delalloc_meta_reserved = 0;
if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
spin_unlock(&BTRFS_I(inode)->lock);
release = true;
goto migrate;
@ -1706,7 +1706,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
btrfs_set_stack_inode_generation(inode_item,
BTRFS_I(inode)->generation);
btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
btrfs_set_stack_inode_transid(inode_item, trans->transid);
btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
@ -1754,7 +1754,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
inode->i_version = btrfs_stack_inode_sequence(inode_item);
inode->i_rdev = 0;
*rdev = btrfs_stack_inode_rdev(inode_item);
BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);

View file

@ -525,7 +525,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
@ -584,7 +584,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
ref->seq = seq;
@ -658,10 +658,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
if (!need_ref_seq(for_cow, ref_root) &&
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
@ -706,10 +707,11 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
if (!need_ref_seq(for_cow, ref_root) &&
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}

View file

@ -195,11 +195,6 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
struct seq_list {
struct list_head list;
u64 seq;
};
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
@ -229,25 +224,6 @@ btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.

View file

@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->orphan_block_rsv = NULL;
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->orphan_list);
INIT_LIST_HEAD(&root->root_list);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
atomic_set(&root->orphan_inodes, 0);
root->log_batch = 0;
root->log_transid = 0;
root->last_log_commit = 0;
@ -1252,7 +1252,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
BTRFS_TREE_LOG_OBJECTID, NULL,
0, 0, 0, 0);
0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
@ -1914,11 +1914,14 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
btrfs_mapping_init(&fs_info->mapping_tree);
btrfs_init_block_rsv(&fs_info->global_block_rsv);
btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
@ -1931,12 +1934,14 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
fs_info->trans_no_join = 0;
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
@ -2001,7 +2006,8 @@ int open_ctree(struct super_block *sb,
BTRFS_I(fs_info->btree_inode)->root = tree_root;
memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
sizeof(struct btrfs_key));
BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
set_bit(BTRFS_INODE_DUMMY,
&BTRFS_I(fs_info->btree_inode)->runtime_flags);
insert_inode_hash(fs_info->btree_inode);
spin_lock_init(&fs_info->block_group_cache_lock);
@ -2353,6 +2359,13 @@ retry_root_backup:
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
ret = btrfs_init_dev_stats(fs_info);
if (ret) {
printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
ret);
goto fail_block_groups;
}
ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
@ -2556,18 +2569,19 @@ recovery_tree_root:
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
char b[BDEVNAME_SIZE];
if (uptodate) {
set_buffer_uptodate(bh);
} else {
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
printk_ratelimited(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
bdevname(bh->b_bdev, b));
"I/O error on %s\n", device->name);
/* note, we dont' set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
}
unlock_buffer(bh);
put_bh(bh);
@ -2682,6 +2696,7 @@ static int write_dev_supers(struct btrfs_device *device,
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
}
/*
@ -2740,6 +2755,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
}
if (!bio_flagged(bio, BIO_UPTODATE)) {
ret = -EIO;
if (!bio_flagged(bio, BIO_EOPNOTSUPP))
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
/* drop the reference from the wait == 0 run */
@ -2902,19 +2920,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
return ret;
}
/* Kill all outstanding I/O */
void btrfs_abort_devices(struct btrfs_root *root)
{
struct list_head *head;
struct btrfs_device *dev;
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
head = &root->fs_info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
blk_abort_queue(dev->bdev->bd_disk->queue);
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
}
void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
spin_lock(&fs_info->fs_roots_radix_lock);
@ -3671,17 +3676,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
return 0;
}
static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state)
{
struct super_block *sb = page->mapping->host->i_sb;
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
btrfs_error(fs_info, -EIO,
"Error occured while writing out btree at %llu", start);
return -EIO;
}
static struct extent_io_ops btree_extent_io_ops = {
.write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
@ -3689,5 +3683,4 @@ static struct extent_io_ops btree_extent_io_ops = {
.submit_bio_hook = btree_submit_bio_hook,
/* note we're sharing with inode.c for the merge bio hook */
.merge_bio_hook = btrfs_merge_bio_hook,
.writepage_io_failed_hook = btree_writepage_io_failed_hook,
};

View file

@ -89,7 +89,6 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
void btrfs_abort_devices(struct btrfs_root *root);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);

View file

@ -3578,7 +3578,7 @@ again:
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
return ret;
}
@ -4355,10 +4355,9 @@ static unsigned drop_outstanding_extent(struct inode *inode)
BTRFS_I(inode)->outstanding_extents--;
if (BTRFS_I(inode)->outstanding_extents == 0 &&
BTRFS_I(inode)->delalloc_meta_reserved) {
test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
drop_inode_space = 1;
BTRFS_I(inode)->delalloc_meta_reserved = 0;
}
/*
* If we have more or the same amount of outsanding extents than we have
@ -4465,7 +4464,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
* Add an item to reserve for updating the inode when we complete the
* delalloc io.
*/
if (!BTRFS_I(inode)->delalloc_meta_reserved) {
if (!test_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags)) {
nr_extents++;
extra_reserve = 1;
}
@ -4511,7 +4511,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
spin_lock(&BTRFS_I(inode)->lock);
if (extra_reserve) {
BTRFS_I(inode)->delalloc_meta_reserved = 1;
set_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
&BTRFS_I(inode)->runtime_flags);
nr_extents--;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
@ -5217,7 +5218,7 @@ out:
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref, int for_cow)
u64 parent, int last_ref)
{
struct btrfs_block_group_cache *cache = NULL;
int ret;
@ -5227,7 +5228,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
buf->start, buf->len,
parent, root->root_key.objectid,
btrfs_header_level(buf),
BTRFS_DROP_DELAYED_REF, NULL, for_cow);
BTRFS_DROP_DELAYED_REF, NULL, 0);
BUG_ON(ret); /* -ENOMEM */
}
@ -6249,7 +6250,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size, int for_cow)
u64 hint, u64 empty_size)
{
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
@ -6297,7 +6298,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
ins.objectid,
ins.offset, parent, root_objectid,
level, BTRFS_ADD_DELAYED_EXTENT,
extent_op, for_cow);
extent_op, 0);
BUG_ON(ret); /* -ENOMEM */
}
return buf;
@ -6715,7 +6716,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_owner(path->nodes[level + 1]));
}
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1, 0);
btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
out:
wc->refs[level] = 0;
wc->flags[level] = 0;

View file

@ -186,7 +186,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
return parent;
}
entry = rb_entry(node, struct tree_entry, rb_node);
rb_link_node(node, parent, p);
rb_insert_color(node, root);
return NULL;
@ -413,7 +412,7 @@ static struct extent_state *next_state(struct extent_state *state)
/*
* utility function to clear some bits in an extent state struct.
* it will optionally wake up any one waiting on this state (wake == 1)
* it will optionally wake up any one waiting on this state (wake == 1).
*
* If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree
@ -570,10 +569,8 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
state = clear_state_bit(tree, state, &bits, wake);
goto next;
}
goto search_again;
}
@ -781,7 +778,6 @@ hit_next:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
struct rb_node *next_node;
if (state->state & exclusive_bits) {
*failed_start = state->start;
err = -EEXIST;
@ -789,20 +785,15 @@ hit_next:
}
set_state_bits(tree, state, &bits);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
if (state->start == start)
goto hit_next;
}
state = next_state(state);
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
goto search_again;
}
@ -845,6 +836,10 @@ hit_next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
state = next_state(state);
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
}
goto search_again;
}
@ -994,21 +989,14 @@ hit_next:
* Just lock what we found and keep going
*/
if (state->start == start && state->end <= end) {
struct rb_node *next_node;
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
next_node = rb_next(&state->rb_node);
if (next_node && start < end && prealloc && !need_resched()) {
state = rb_entry(next_node, struct extent_state,
rb_node);
if (state->start == start)
goto hit_next;
}
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
goto search_again;
}
@ -1042,10 +1030,13 @@ hit_next:
goto out;
if (state->end <= end) {
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
state = clear_state_bit(tree, state, &clear_bits, 0);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
if (start < end && state && state->start == start &&
!need_resched())
goto hit_next;
}
goto search_again;
}
@ -1173,9 +1164,8 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
cached_state, mask);
}
static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state,
gfp_t mask)
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask)
{
return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, mask);
@ -1293,7 +1283,7 @@ out:
* returned if we find something, and *start_ret and *end_ret are
* set to reflect the state struct that was found.
*
* If nothing was found, 1 is returned, < 0 on error
* If nothing was found, 1 is returned. If found something, return 0.
*/
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits)
@ -1923,6 +1913,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
/* try to remap that extent elsewhere? */
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO;
}
@ -2222,17 +2213,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
uptodate = 0;
}
if (!uptodate && tree->ops &&
tree->ops->writepage_io_failed_hook) {
ret = tree->ops->writepage_io_failed_hook(NULL, page,
start, end, NULL);
/* Writeback already completed */
if (ret == 0)
return 1;
}
if (!uptodate) {
clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
ClearPageUptodate(page);
SetPageError(page);
}
@ -2347,10 +2328,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
if (ret)
if (ret) {
/* no IO indicated but software detected errors
* in the block, either checksum errors or
* issues with the contents */
struct btrfs_root *root =
BTRFS_I(page->mapping->host)->root;
struct btrfs_device *device;
uptodate = 0;
else
device = btrfs_find_device_for_logical(
root, start, mirror);
if (device)
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
} else {
clean_io_failure(start, page);
}
}
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
@ -3164,7 +3158,7 @@ static int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
int ret;
int ret = 0;
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
@ -3930,6 +3924,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
eb->start = start;
eb->len = len;
eb->tree = tree;
eb->bflags = 0;
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
@ -3967,6 +3962,60 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return eb;
}
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
{
unsigned long i;
struct page *p;
struct extent_buffer *new;
unsigned long num_pages = num_extent_pages(src->start, src->len);
new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
if (new == NULL)
return NULL;
for (i = 0; i < num_pages; i++) {
p = alloc_page(GFP_ATOMIC);
BUG_ON(!p);
attach_extent_buffer_page(new, p);
WARN_ON(PageDirty(p));
SetPageUptodate(p);
new->pages[i] = p;
}
copy_extent_buffer(new, src, 0, 0, src->len);
set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags);
set_bit(EXTENT_BUFFER_DUMMY, &new->bflags);
return new;
}
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
{
struct extent_buffer *eb;
unsigned long num_pages = num_extent_pages(0, len);
unsigned long i;
eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
if (!eb)
return NULL;
for (i = 0; i < num_pages; i++) {
eb->pages[i] = alloc_page(GFP_ATOMIC);
if (!eb->pages[i])
goto err;
}
set_extent_buffer_uptodate(eb);
btrfs_set_header_nritems(eb, 0);
set_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
return eb;
err:
for (i--; i > 0; i--)
__free_page(eb->pages[i]);
__free_extent_buffer(eb);
return NULL;
}
static int extent_buffer_under_io(struct extent_buffer *eb)
{
return (atomic_read(&eb->io_pages) ||
@ -3981,18 +4030,21 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
unsigned long start_idx)
{
unsigned long index;
unsigned long num_pages;
struct page *page;
int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
BUG_ON(extent_buffer_under_io(eb));
index = num_extent_pages(eb->start, eb->len);
num_pages = num_extent_pages(eb->start, eb->len);
index = start_idx + num_pages;
if (start_idx >= index)
return;
do {
index--;
page = extent_buffer_page(eb, index);
if (page) {
if (page && mapped) {
spin_lock(&page->mapping->private_lock);
/*
* We do this since we'll remove the pages after we've
@ -4017,6 +4069,8 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
}
spin_unlock(&page->mapping->private_lock);
}
if (page) {
/* One for when we alloced the page */
page_cache_release(page);
}
@ -4235,14 +4289,18 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
struct extent_io_tree *tree = eb->tree;
if (test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) {
spin_unlock(&eb->refs_lock);
} else {
struct extent_io_tree *tree = eb->tree;
spin_unlock(&eb->refs_lock);
spin_unlock(&eb->refs_lock);
spin_lock(&tree->buffer_lock);
radix_tree_delete(&tree->buffer,
eb->start >> PAGE_CACHE_SHIFT);
spin_unlock(&tree->buffer_lock);
spin_lock(&tree->buffer_lock);
radix_tree_delete(&tree->buffer,
eb->start >> PAGE_CACHE_SHIFT);
spin_unlock(&tree->buffer_lock);
}
/* Should be safe to release our pages at this point */
btrfs_release_extent_buffer_page(eb, 0);
@ -4259,6 +4317,10 @@ void free_extent_buffer(struct extent_buffer *eb)
return;
spin_lock(&eb->refs_lock);
if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))
atomic_dec(&eb->refs);
if (atomic_read(&eb->refs) == 2 &&
test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
!extent_buffer_under_io(eb) &&

View file

@ -39,6 +39,7 @@
#define EXTENT_BUFFER_STALE 6
#define EXTENT_BUFFER_WRITEBACK 7
#define EXTENT_BUFFER_IOERR 8
#define EXTENT_BUFFER_DUMMY 9
/* these are flags for extent_clear_unlock_delalloc */
#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
@ -75,9 +76,6 @@ struct extent_io_ops {
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
@ -225,6 +223,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
@ -265,6 +265,8 @@ void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len);
void free_extent_buffer(struct extent_buffer *eb);

View file

@ -65,6 +65,21 @@ struct inode_defrag {
int cycled;
};
static int __compare_inode_defrag(struct inode_defrag *defrag1,
struct inode_defrag *defrag2)
{
if (defrag1->root > defrag2->root)
return 1;
else if (defrag1->root < defrag2->root)
return -1;
else if (defrag1->ino > defrag2->ino)
return 1;
else if (defrag1->ino < defrag2->ino)
return -1;
else
return 0;
}
/* pop a record for an inode into the defrag tree. The lock
* must be held already
*
@ -81,15 +96,17 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *entry;
struct rb_node **p;
struct rb_node *parent = NULL;
int ret;
p = &root->fs_info->defrag_inodes.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
if (defrag->ino < entry->ino)
ret = __compare_inode_defrag(defrag, entry);
if (ret < 0)
p = &parent->rb_left;
else if (defrag->ino > entry->ino)
else if (ret > 0)
p = &parent->rb_right;
else {
/* if we're reinserting an entry for
@ -103,7 +120,7 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
goto exists;
}
}
BTRFS_I(inode)->in_defrag = 1;
set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
return;
@ -131,7 +148,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
if (btrfs_fs_closing(root->fs_info))
return 0;
if (BTRFS_I(inode)->in_defrag)
if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
return 0;
if (trans)
@ -148,7 +165,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag->root = root->root_key.objectid;
spin_lock(&root->fs_info->defrag_inodes_lock);
if (!BTRFS_I(inode)->in_defrag)
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
__btrfs_add_inode_defrag(inode, defrag);
else
kfree(defrag);
@ -159,28 +176,35 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
/*
* must be called with the defrag_inodes lock held
*/
struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, u64 ino,
struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
u64 root, u64 ino,
struct rb_node **next)
{
struct inode_defrag *entry = NULL;
struct inode_defrag tmp;
struct rb_node *p;
struct rb_node *parent = NULL;
int ret;
tmp.ino = ino;
tmp.root = root;
p = info->defrag_inodes.rb_node;
while (p) {
parent = p;
entry = rb_entry(parent, struct inode_defrag, rb_node);
if (ino < entry->ino)
ret = __compare_inode_defrag(&tmp, entry);
if (ret < 0)
p = parent->rb_left;
else if (ino > entry->ino)
else if (ret > 0)
p = parent->rb_right;
else
return entry;
}
if (next) {
while (parent && ino > entry->ino) {
while (parent && __compare_inode_defrag(&tmp, entry) > 0) {
parent = rb_next(parent);
entry = rb_entry(parent, struct inode_defrag, rb_node);
}
@ -202,6 +226,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
struct btrfs_key key;
struct btrfs_ioctl_defrag_range_args range;
u64 first_ino = 0;
u64 root_objectid = 0;
int num_defrag;
int defrag_batch = 1024;
@ -214,11 +239,14 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
n = NULL;
/* find an inode to defrag */
defrag = btrfs_find_defrag_inode(fs_info, first_ino, &n);
defrag = btrfs_find_defrag_inode(fs_info, root_objectid,
first_ino, &n);
if (!defrag) {
if (n)
defrag = rb_entry(n, struct inode_defrag, rb_node);
else if (first_ino) {
if (n) {
defrag = rb_entry(n, struct inode_defrag,
rb_node);
} else if (root_objectid || first_ino) {
root_objectid = 0;
first_ino = 0;
continue;
} else {
@ -228,6 +256,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
/* remove it from the rbtree */
first_ino = defrag->ino + 1;
root_objectid = defrag->root;
rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
if (btrfs_fs_closing(fs_info))
@ -252,7 +281,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
goto next;
/* do a chunk of defrag */
BTRFS_I(inode)->in_defrag = 0;
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
range.start = defrag->last_offset;
num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
defrag_batch);
@ -1409,7 +1438,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
goto out;
}
BTRFS_I(inode)->sequence++;
start_pos = round_down(pos, root->sectorsize);
if (start_pos > i_size_read(inode)) {
@ -1466,8 +1494,8 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* flush down new bytes that may have been written if the
* application were using truncate to replace a file in place.
*/
if (BTRFS_I(inode)->ordered_data_close) {
BTRFS_I(inode)->ordered_data_close = 0;
if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(inode->i_mapping);
@ -1498,14 +1526,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_btrfs_sync_file(file, datasync);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
mutex_lock(&inode->i_mutex);
/* we wait first, since the writeback may change the inode */
/*
* we wait first, since the writeback may change the inode, also wait
* ordered range does a filemape_write_and_wait_range which is why we
* don't do it above like other file systems.
*/
root->log_batch++;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_wait_ordered_range(inode, start, end);
root->log_batch++;
/*
@ -1523,7 +1552,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* syncing
*/
smp_mb();
if (BTRFS_I(inode)->last_trans <=
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
mutex_unlock(&inode->i_mutex);

View file

@ -33,6 +33,8 @@
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path,
@ -584,6 +586,44 @@ static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
return 0;
}
/*
* Since we attach pinned extents after the fact we can have contiguous sections
* of free space that are split up in entries. This poses a problem with the
* tree logging stuff since it could have allocated across what appears to be 2
* entries since we would have merged the entries when adding the pinned extents
* back to the free space cache. So run through the space cache that we just
* loaded and merge contiguous entries. This will make the log replay stuff not
* blow up and it will make for nicer allocator behavior.
*/
static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *e, *prev = NULL;
struct rb_node *n;
again:
spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
e = rb_entry(n, struct btrfs_free_space, offset_index);
if (!prev)
goto next;
if (e->bitmap || prev->bitmap)
goto next;
if (prev->offset + prev->bytes == e->offset) {
unlink_free_space(ctl, prev);
unlink_free_space(ctl, e);
prev->bytes += e->bytes;
kmem_cache_free(btrfs_free_space_cachep, e);
link_free_space(ctl, prev);
prev = NULL;
spin_unlock(&ctl->tree_lock);
goto again;
}
next:
prev = e;
}
spin_unlock(&ctl->tree_lock);
}
int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
@ -726,6 +766,7 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
io_ctl_drop_pages(&io_ctl);
merge_space_tree(ctl);
ret = 1;
out:
io_ctl_free(&io_ctl);
@ -972,9 +1013,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
goto out;
ret = filemap_write_and_wait(inode->i_mapping);
if (ret)
goto out;
btrfs_wait_ordered_range(inode, 0, (u64)-1);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;

View file

@ -89,7 +89,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
static int btrfs_setsize(struct inode *inode, loff_t newsize);
static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
@ -257,10 +257,13 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
ret = insert_inline_extent(trans, root, inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret) {
if (ret && ret != -ENOSPC) {
btrfs_abort_transaction(trans, root, ret);
return ret;
} else if (ret == -ENOSPC) {
return 1;
}
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
@ -1572,11 +1575,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
if (btrfs_is_free_space_inode(root, inode))
metadata = 2;
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
if (ret)
return ret;
if (!(rw & REQ_WRITE)) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
if (ret)
return ret;
if (bio_flags & EXTENT_BIO_COMPRESSED) {
return btrfs_submit_compressed_read(inode, bio,
mirror_num, bio_flags);
@ -1815,25 +1818,24 @@ out:
* an ordered extent if the range of bytes in the file it covers are
* fully written.
*/
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
{
struct inode *inode = ordered_extent->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans = NULL;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
int compress_type = 0;
int ret;
bool nolock;
ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1);
if (!ret)
return 0;
BUG_ON(!ordered_extent); /* Logic error */
nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
goto out;
}
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@ -1889,12 +1891,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->file_offset,
ordered_extent->len);
}
unlock_extent_cached(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
goto out_unlock;
}
add_pending_csums(trans, inode, ordered_extent->file_offset,
@ -1905,10 +1905,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_update_inode_fallback(trans, root, inode);
if (ret) { /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, root, ret);
goto out;
goto out_unlock;
}
}
ret = 0;
out_unlock:
unlock_extent_cached(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
out:
if (root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
@ -1919,26 +1923,57 @@ out:
btrfs_end_transaction(trans, root);
}
if (ret)
clear_extent_uptodate(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, NULL, GFP_NOFS);
/*
* This needs to be dont to make sure anybody waiting knows we are done
* upating everything for this ordered extent.
*/
btrfs_remove_ordered_extent(inode, ordered_extent);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
return 0;
out_unlock:
unlock_extent_cached(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, &cached_state, GFP_NOFS);
goto out;
return ret;
}
static void finish_ordered_fn(struct btrfs_work *work)
{
struct btrfs_ordered_extent *ordered_extent;
ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
btrfs_finish_ordered_io(ordered_extent);
}
static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate)
{
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct btrfs_workers *workers;
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
ClearPagePrivate2(page);
return btrfs_finish_ordered_io(page->mapping->host, start, end);
if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
end - start + 1, uptodate))
return 0;
ordered_extent->work.func = finish_ordered_fn;
ordered_extent->work.flags = 0;
if (btrfs_is_free_space_inode(root, inode))
workers = &root->fs_info->endio_freespace_worker;
else
workers = &root->fs_info->endio_write_workers;
btrfs_queue_worker(workers, &ordered_extent->work);
return 0;
}
/*
@ -2072,12 +2107,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *block_rsv;
int ret;
if (!list_empty(&root->orphan_list) ||
if (atomic_read(&root->orphan_inodes) ||
root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
return;
spin_lock(&root->orphan_lock);
if (!list_empty(&root->orphan_list)) {
if (atomic_read(&root->orphan_inodes)) {
spin_unlock(&root->orphan_lock);
return;
}
@ -2134,8 +2169,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
block_rsv = NULL;
}
if (list_empty(&BTRFS_I(inode)->i_orphan)) {
list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
#if 0
/*
* For proper ENOSPC handling, we should do orphan
@ -2148,12 +2183,12 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
insert = 1;
#endif
insert = 1;
atomic_dec(&root->orphan_inodes);
}
if (!BTRFS_I(inode)->orphan_meta_reserved) {
BTRFS_I(inode)->orphan_meta_reserved = 1;
if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
reserve = 1;
}
spin_unlock(&root->orphan_lock);
/* grab metadata reservation from transaction handle */
@ -2166,6 +2201,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
if (ret && ret != -EEXIST) {
clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@ -2196,15 +2233,13 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
int ret = 0;
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
list_del_init(&BTRFS_I(inode)->i_orphan);
if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags))
delete_item = 1;
}
if (BTRFS_I(inode)->orphan_meta_reserved) {
BTRFS_I(inode)->orphan_meta_reserved = 0;
if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
&BTRFS_I(inode)->runtime_flags))
release_rsv = 1;
}
spin_unlock(&root->orphan_lock);
if (trans && delete_item) {
@ -2212,8 +2247,10 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
}
if (release_rsv)
if (release_rsv) {
btrfs_orphan_release_metadata(inode);
atomic_dec(&root->orphan_inodes);
}
return 0;
}
@ -2341,6 +2378,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = PTR_ERR(trans);
goto out;
}
printk(KERN_ERR "auto deleting %Lu\n",
found_key.objectid);
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@ -2352,9 +2391,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* add this inode to the orphan list so btrfs_orphan_del does
* the proper thing when we hit it
*/
spin_lock(&root->orphan_lock);
list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
spin_unlock(&root->orphan_lock);
set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
@ -2510,7 +2548,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
BTRFS_I(inode)->sequence = btrfs_inode_sequence(leaf, inode_item);
inode->i_version = btrfs_inode_sequence(leaf, inode_item);
inode->i_generation = BTRFS_I(inode)->generation;
inode->i_rdev = 0;
rdev = btrfs_inode_rdev(leaf, inode_item);
@ -2594,7 +2632,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
btrfs_set_inode_sequence(leaf, item, BTRFS_I(inode)->sequence);
btrfs_set_inode_sequence(leaf, item, inode->i_version);
btrfs_set_inode_transid(leaf, item, trans->transid);
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
@ -2752,6 +2790,8 @@ err:
goto out;
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(inode);
inode_inc_iversion(dir);
inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
btrfs_update_inode(trans, root, dir);
out:
@ -3089,6 +3129,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(dir);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, dir);
if (ret)
@ -3607,7 +3648,8 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
* any new writes get down to disk quickly.
*/
if (newsize == 0)
BTRFS_I(inode)->ordered_data_close = 1;
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
/* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
@ -3638,6 +3680,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid) {
setattr_copy(inode, attr);
inode_inc_iversion(inode);
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
@ -3671,7 +3714,8 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (root->fs_info->log_root_recovering) {
BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags));
goto no_delete;
}
@ -4066,7 +4110,7 @@ static struct inode *new_simple_dir(struct super_block *s,
BTRFS_I(inode)->root = root;
memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
BTRFS_I(inode)->dummy_inode = 1;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
inode->i_op = &btrfs_dir_ro_inode_operations;
@ -4370,7 +4414,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
int ret = 0;
bool nolock = false;
if (BTRFS_I(inode)->dummy_inode)
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
@ -4403,7 +4447,7 @@ int btrfs_dirty_inode(struct inode *inode)
struct btrfs_trans_handle *trans;
int ret;
if (BTRFS_I(inode)->dummy_inode)
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
trans = btrfs_join_transaction(root);
@ -4730,6 +4774,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
name_len * 2);
inode_inc_iversion(parent_inode);
parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
@ -4937,6 +4982,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
}
btrfs_inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
@ -5903,9 +5949,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_dio_private *dip = bio->bi_private;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_ordered_extent *ordered = NULL;
struct extent_state *cached_state = NULL;
u64 ordered_offset = dip->logical_offset;
u64 ordered_bytes = dip->bytes;
int ret;
@ -5915,73 +5959,14 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
again:
ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
&ordered_offset,
ordered_bytes);
ordered_bytes, !err);
if (!ret)
goto out_test;
BUG_ON(!ordered);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
err = -ENOMEM;
goto out;
}
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
ret = btrfs_ordered_update_i_size(inode, 0, ordered);
if (!ret)
err = btrfs_update_inode_fallback(trans, root, inode);
goto out;
}
lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
ordered->file_offset + ordered->len - 1, 0,
&cached_state);
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
ret = btrfs_mark_extent_written(trans, inode,
ordered->file_offset,
ordered->file_offset +
ordered->len);
if (ret) {
err = ret;
goto out_unlock;
}
} else {
ret = insert_reserved_file_extent(trans, inode,
ordered->file_offset,
ordered->start,
ordered->disk_len,
ordered->len,
ordered->len,
0, 0, 0,
BTRFS_FILE_EXTENT_REG);
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
ordered->file_offset, ordered->len);
if (ret) {
err = ret;
WARN_ON(1);
goto out_unlock;
}
}
add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
ret = btrfs_ordered_update_i_size(inode, 0, ordered);
if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
btrfs_update_inode_fallback(trans, root, inode);
ret = 0;
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
ordered->file_offset + ordered->len - 1,
&cached_state, GFP_NOFS);
out:
btrfs_delalloc_release_metadata(inode, ordered->len);
btrfs_end_transaction(trans, root);
ordered_offset = ordered->file_offset + ordered->len;
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
ordered->work.func = finish_ordered_fn;
ordered->work.flags = 0;
btrfs_queue_worker(&root->fs_info->endio_write_workers,
&ordered->work);
out_test:
/*
* our bio might span multiple ordered extents. If we haven't
@ -5990,12 +5975,12 @@ out_test:
if (ordered_offset < dip->logical_offset + dip->bytes) {
ordered_bytes = dip->logical_offset + dip->bytes -
ordered_offset;
ordered = NULL;
goto again;
}
out_done:
bio->bi_private = dip->private;
kfree(dip->csums);
kfree(dip);
/* If we had an error make sure to clear the uptodate flag */
@ -6063,9 +6048,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
int ret;
bio_get(bio);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
if (ret)
goto err;
if (!write) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
if (ret)
goto err;
}
if (skip_sum)
goto map;
@ -6485,13 +6473,13 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
static void btrfs_invalidatepage(struct page *page, unsigned long offset)
{
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
/*
* we have the page locked, so new writeback can't start,
* and the dirty bit won't be cleared while we are here.
@ -6501,13 +6489,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
*/
wait_on_page_writeback(page);
tree = &BTRFS_I(page->mapping->host)->io_tree;
tree = &BTRFS_I(inode)->io_tree;
if (offset) {
btrfs_releasepage(page, GFP_NOFS);
return;
}
lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
ordered = btrfs_lookup_ordered_extent(page->mapping->host,
ordered = btrfs_lookup_ordered_extent(inode,
page_offset(page));
if (ordered) {
/*
@ -6522,9 +6510,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
* whoever cleared the private bit is responsible
* for the finish_ordered_io
*/
if (TestClearPagePrivate2(page)) {
btrfs_finish_ordered_io(page->mapping->host,
page_start, page_end);
if (TestClearPagePrivate2(page) &&
btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
PAGE_CACHE_SIZE, 1)) {
btrfs_finish_ordered_io(ordered);
}
btrfs_put_ordered_extent(ordered);
cached_state = NULL;
@ -6771,7 +6760,8 @@ static int btrfs_truncate(struct inode *inode)
* using truncate to replace the contents of the file will
* end up with a zero length file after a crash.
*/
if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags))
btrfs_add_ordered_operation(trans, root, inode);
while (1) {
@ -6894,7 +6884,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->root = NULL;
ei->space_info = NULL;
ei->generation = 0;
ei->sequence = 0;
ei->last_trans = 0;
ei->last_sub_trans = 0;
ei->logged_trans = 0;
@ -6909,11 +6898,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->outstanding_extents = 0;
ei->reserved_extents = 0;
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
ei->dummy_inode = 0;
ei->in_defrag = 0;
ei->delalloc_meta_reserved = 0;
ei->runtime_flags = 0;
ei->force_compress = BTRFS_COMPRESS_NONE;
ei->delayed_node = NULL;
@ -6927,7 +6912,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->i_orphan);
INIT_LIST_HEAD(&ei->delalloc_inodes);
INIT_LIST_HEAD(&ei->ordered_operations);
RB_CLEAR_NODE(&ei->rb_node);
@ -6972,13 +6956,12 @@ void btrfs_destroy_inode(struct inode *inode)
spin_unlock(&root->fs_info->ordered_extent_lock);
}
spin_lock(&root->orphan_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
(unsigned long long)btrfs_ino(inode));
list_del_init(&BTRFS_I(inode)->i_orphan);
atomic_dec(&root->orphan_inodes);
}
spin_unlock(&root->orphan_lock);
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@ -7193,6 +7176,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
btrfs_add_ordered_operation(trans, root, old_inode);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
inode_inc_iversion(old_inode);
old_dir->i_ctime = old_dir->i_mtime = ctime;
new_dir->i_ctime = new_dir->i_mtime = ctime;
old_inode->i_ctime = ctime;
@ -7219,6 +7205,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (new_inode) {
inode_inc_iversion(new_inode);
new_inode->i_ctime = CURRENT_TIME;
if (unlikely(btrfs_ino(new_inode) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
@ -7490,6 +7477,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
cur_offset += ins.offset;
*alloc_hint = ins.objectid + ins.offset;
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) &&

View file

@ -261,6 +261,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
btrfs_update_iflags(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
@ -367,7 +368,7 @@ static noinline int create_subvol(struct btrfs_root *root,
return PTR_ERR(trans);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0, 0);
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
@ -2262,10 +2263,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
if (dev->name)
if (dev->name) {
strncpy(di_args->path, dev->name, sizeof(di_args->path));
else
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
}
out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
@ -2622,6 +2625,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
inode_inc_iversion(inode);
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
/*
@ -2914,7 +2918,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
up_read(&info->groups_sem);
}
user_dest = (struct btrfs_ioctl_space_info *)
user_dest = (struct btrfs_ioctl_space_info __user *)
(arg + sizeof(struct btrfs_ioctl_space_args));
if (copy_to_user(user_dest, dest_orig, alloc_size))
@ -3042,6 +3046,28 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
return ret;
}
static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root,
void __user *arg, int reset_after_read)
{
struct btrfs_ioctl_get_dev_stats *sa;
int ret;
if (reset_after_read && !capable(CAP_SYS_ADMIN))
return -EPERM;
sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa))
return PTR_ERR(sa);
ret = btrfs_get_dev_stats(root, sa, reset_after_read);
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
kfree(sa);
return ret;
}
static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
{
int ret = 0;
@ -3212,8 +3238,9 @@ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
}
}
static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
static long btrfs_ioctl_balance(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
@ -3225,6 +3252,10 @@ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
ret = mnt_want_write(file->f_path.mnt);
if (ret)
return ret;
mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);
@ -3291,6 +3322,7 @@ out_bargs:
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
mnt_drop_write(file->f_path.mnt);
return ret;
}
@ -3386,7 +3418,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(root, NULL);
return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
@ -3419,11 +3451,15 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp);
return btrfs_ioctl_balance(file, argp);
case BTRFS_IOC_BALANCE_CTL:
return btrfs_ioctl_balance_ctl(root, arg);
case BTRFS_IOC_BALANCE_PROGRESS:
return btrfs_ioctl_balance_progress(root, argp);
case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 0);
case BTRFS_IOC_GET_AND_RESET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(root, argp, 1);
}
return -ENOTTY;

View file

@ -266,6 +266,35 @@ struct btrfs_ioctl_logical_ino_args {
__u64 inodes;
};
enum btrfs_dev_stat_values {
/* disk I/O failure stats */
BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
/* stats for indirect indications for I/O failures */
BTRFS_DEV_STAT_CORRUPTION_ERRS, /* checksum error, bytenr error or
* contents is illegal: this is an
* indication that the block was damaged
* during read or write, or written to
* wrong location or read from wrong
* location */
BTRFS_DEV_STAT_GENERATION_ERRS, /* an indication that blocks have not
* been written */
BTRFS_DEV_STAT_VALUES_MAX
};
struct btrfs_ioctl_get_dev_stats {
__u64 devid; /* in */
__u64 nr_items; /* in/out */
/* out values: */
__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@ -330,5 +359,9 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_get_dev_stats)
#endif

View file

@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->len = len;
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = inode;
entry->inode = igrab(inode);
entry->compress_type = compress_type;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
trace_btrfs_ordered_extent_add(inode, entry);
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
&entry->rb_node);
if (node)
ordered_data_tree_panic(inode, -EEXIST, file_offset);
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode,
struct btrfs_ordered_inode_tree *tree;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
}
/*
@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode,
*/
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size)
u64 *file_offset, u64 io_size, int uptodate)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
int ret;
unsigned long flags;
u64 dec_end;
u64 dec_start;
u64 to_dec;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irqsave(&tree->lock, flags);
node = tree_search(tree, *file_offset);
if (!node) {
ret = 1;
@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
(unsigned long long)to_dec);
}
entry->bytes_left -= to_dec;
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
else
@ -332,7 +336,7 @@ out:
*cached = entry;
atomic_inc(&entry->refs);
}
spin_unlock(&tree->lock);
spin_unlock_irqrestore(&tree->lock, flags);
return ret == 0;
}
@ -347,15 +351,21 @@ out:
*/
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size)
u64 file_offset, u64 io_size, int uptodate)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
unsigned long flags;
int ret;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irqsave(&tree->lock, flags);
if (cached && *cached) {
entry = *cached;
goto have_entry;
}
node = tree_search(tree, file_offset);
if (!node) {
ret = 1;
@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
}
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
have_entry:
if (!offset_in_entry(entry, file_offset)) {
ret = 1;
goto out;
@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
(unsigned long long)io_size);
}
entry->bytes_left -= io_size;
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
else
@ -383,7 +397,7 @@ out:
*cached = entry;
atomic_inc(&entry->refs);
}
spin_unlock(&tree->lock);
spin_unlock_irqrestore(&tree->lock, flags);
return ret == 0;
}
@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
trace_btrfs_ordered_extent_put(entry->inode, entry);
if (atomic_dec_and_test(&entry->refs)) {
if (entry->inode)
btrfs_add_delayed_iput(entry->inode);
while (!list_empty(&entry->list)) {
cur = entry->list.next;
sum = list_entry(cur, struct btrfs_ordered_sum, list);
@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
/*
* remove an ordered extent from the tree. No references are dropped
* and you must wake_up entry->wait. You must hold the tree lock
* while you call this function.
* and waiters are woken up.
*/
static void __btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct rb_node *node;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_unlock_irq(&tree->lock);
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode,
list_del_init(&BTRFS_I(inode)->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
}
/*
* remove an ordered extent from the tree. No references are dropped
* but any waiters are woken.
*/
void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
__btrfs_remove_ordered_extent(inode, entry);
spin_unlock(&tree->lock);
wake_up(&entry->wait);
}
@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
if (orig_end > INT_LIMIT(loff_t))
orig_end = INT_LIMIT(loff_t);
}
again:
/* start IO across the range first to instantiate any delalloc
* extents
*/
filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
/* The compression code will leave pages locked but return from
* writepage without setting the page writeback. Starting again
* with WB_SYNC_ALL will end up waiting for the IO to actually start.
*/
filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
filemap_fdatawait_range(inode->i_mapping, start, orig_end);
filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
end = orig_end;
found = 0;
@ -657,11 +651,6 @@ again:
break;
end--;
}
if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
EXTENT_DELALLOC, 0, NULL)) {
schedule_timeout(1);
goto again;
}
}
/*
@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node)
goto out;
@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
if (entry)
atomic_inc(&entry->refs);
out:
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
return entry;
}
@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node) {
node = tree_search(tree, file_offset + len);
@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
out:
if (entry)
atomic_inc(&entry->refs);
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
return entry;
}
@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
struct btrfs_ordered_extent *entry = NULL;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
node = tree_search(tree, file_offset);
if (!node)
goto out;
@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
atomic_inc(&entry->refs);
out:
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
return entry;
}
@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered)
{
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 disk_i_size;
u64 new_i_size;
u64 i_size_test;
@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
else
offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
disk_i_size = BTRFS_I(inode)->disk_i_size;
/* truncate file */
@ -797,14 +785,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
goto out;
}
/*
* we can't update the disk_isize if there are delalloc bytes
* between disk_i_size and this ordered extent
*/
if (test_range_bit(io_tree, disk_i_size, offset - 1,
EXTENT_DELALLOC, 0, NULL)) {
goto out;
}
/*
* walk backward from this ordered extent to disk_i_size.
* if we find an ordered extent then we can't update disk i_size
@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
}
node = prev;
}
while (node) {
for (; node; node = rb_prev(node)) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
/* We treat this entry as if it doesnt exist */
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
if (test->file_offset + test->len <= disk_i_size)
break;
if (test->file_offset >= i_size)
break;
if (test->file_offset >= disk_i_size)
goto out;
node = rb_prev(node);
}
new_i_size = min_t(u64, offset, i_size);
@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
else
node = rb_first(&tree->tree);
}
i_size_test = 0;
if (node) {
/*
* do we have an area where IO might have finished
* between our ordered extent and the next one.
*/
/*
* We are looking for an area between our current extent and the next
* ordered extent to update the i_size to. There are 3 cases here
*
* 1) We don't actually have anything and we can update to i_size.
* 2) We have stuff but they already did their i_size update so again we
* can just update to i_size.
* 3) We have an outstanding ordered extent so the most we can update
* our disk_i_size to is the start of the next offset.
*/
i_size_test = i_size;
for (; node; node = rb_next(node)) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (test->file_offset > offset)
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
if (test->file_offset > offset) {
i_size_test = test->file_offset;
} else {
i_size_test = i_size;
break;
}
}
/*
* i_size_test is the end of a region after this ordered
* extent where there are no ordered extents. As long as there
* are no delalloc bytes in this area, it is safe to update
* disk_i_size to the end of the region.
* extent where there are no ordered extents, we can safely set
* disk_i_size to this.
*/
if (i_size_test > offset &&
!test_range_bit(io_tree, offset, i_size_test - 1,
EXTENT_DELALLOC, 0, NULL)) {
if (i_size_test > offset)
new_i_size = min_t(u64, i_size_test, i_size);
}
BTRFS_I(inode)->disk_i_size = new_i_size;
ret = 0;
out:
/*
* we need to remove the ordered extent with the tree lock held
* so that other people calling this function don't find our fully
* processed ordered entry and skip updating the i_size
* We need to do this because we can't remove ordered extents until
* after the i_disk_size has been updated and then the inode has been
* updated to reflect the change, so we need to tell anybody who finds
* this ordered extent that we've already done all the real work, we
* just haven't completed all the other work.
*/
if (ordered)
__btrfs_remove_ordered_extent(inode, ordered);
spin_unlock(&tree->lock);
if (ordered)
wake_up(&ordered->wait);
set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
spin_unlock_irq(&tree->lock);
return ret;
}
@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
if (!ordered)
return 1;
spin_lock(&tree->lock);
spin_lock_irq(&tree->lock);
list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
if (disk_bytenr >= ordered_sum->bytenr) {
num_sectors = ordered_sum->len / sectorsize;
@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
}
}
out:
spin_unlock(&tree->lock);
spin_unlock_irq(&tree->lock);
btrfs_put_ordered_extent(ordered);
return ret;
}

View file

@ -74,6 +74,12 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */
#define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
* has done its due diligence in updating
* the isize. */
struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset;
@ -113,6 +119,8 @@ struct btrfs_ordered_extent {
/* a per root list of all the pending ordered extents */
struct list_head root_extent_list;
struct btrfs_work work;
};
@ -143,10 +151,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry);
int btrfs_dec_test_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size);
u64 file_offset, u64 io_size, int uptodate);
int btrfs_dec_test_first_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent **cached,
u64 *file_offset, u64 io_size);
u64 *file_offset, u64 io_size,
int uptodate);
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len, u64 disk_len, int type);
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,

View file

@ -294,6 +294,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_dev_extent_chunk_offset(l, dev_extent),
(unsigned long long)
btrfs_dev_extent_length(l, dev_extent));
case BTRFS_DEV_STATS_KEY:
printk(KERN_INFO "\t\tdevice stats\n");
break;
};
}
}

View file

@ -718,13 +718,18 @@ static void reada_start_machine_worker(struct btrfs_work *work)
{
struct reada_machine_work *rmw;
struct btrfs_fs_info *fs_info;
int old_ioprio;
rmw = container_of(work, struct reada_machine_work, work);
fs_info = rmw->fs_info;
kfree(rmw);
old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
task_nice_ioprio(current));
set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info);
set_task_ioprio(current, old_ioprio);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)

View file

@ -50,7 +50,7 @@ struct scrub_dev;
struct scrub_page {
struct scrub_block *sblock;
struct page *page;
struct block_device *bdev;
struct btrfs_device *dev;
u64 flags; /* extent flags */
u64 generation;
u64 logical;
@ -86,6 +86,7 @@ struct scrub_block {
unsigned int header_error:1;
unsigned int checksum_error:1;
unsigned int no_io_error_seen:1;
unsigned int generation_error:1; /* also sets header_error */
};
};
@ -675,6 +676,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sdev->stat.read_errors++;
sdev->stat.uncorrectable_errors++;
spin_unlock(&sdev->stat_lock);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
goto out;
}
@ -686,6 +689,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sdev->stat.read_errors++;
sdev->stat.uncorrectable_errors++;
spin_unlock(&sdev->stat_lock);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
goto out;
}
BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
@ -699,6 +704,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sdev->stat.read_errors++;
sdev->stat.uncorrectable_errors++;
spin_unlock(&sdev->stat_lock);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
goto out;
}
@ -725,12 +732,16 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
spin_unlock(&sdev->stat_lock);
if (__ratelimit(&_rs))
scrub_print_warning("i/o error", sblock_to_check);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_READ_ERRS);
} else if (sblock_bad->checksum_error) {
spin_lock(&sdev->stat_lock);
sdev->stat.csum_errors++;
spin_unlock(&sdev->stat_lock);
if (__ratelimit(&_rs))
scrub_print_warning("checksum error", sblock_to_check);
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
} else if (sblock_bad->header_error) {
spin_lock(&sdev->stat_lock);
sdev->stat.verify_errors++;
@ -738,6 +749,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
if (__ratelimit(&_rs))
scrub_print_warning("checksum/header error",
sblock_to_check);
if (sblock_bad->generation_error)
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_GENERATION_ERRS);
else
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
}
if (sdev->readonly)
@ -998,8 +1015,8 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev,
page = sblock->pagev + page_index;
page->logical = logical;
page->physical = bbio->stripes[mirror_index].physical;
/* for missing devices, bdev is NULL */
page->bdev = bbio->stripes[mirror_index].dev->bdev;
/* for missing devices, dev->bdev is NULL */
page->dev = bbio->stripes[mirror_index].dev;
page->mirror_num = mirror_index + 1;
page->page = alloc_page(GFP_NOFS);
if (!page->page) {
@ -1043,7 +1060,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
struct scrub_page *page = sblock->pagev + page_num;
DECLARE_COMPLETION_ONSTACK(complete);
if (page->bdev == NULL) {
if (page->dev->bdev == NULL) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
continue;
@ -1053,7 +1070,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page->bdev;
bio->bi_bdev = page->dev->bdev;
bio->bi_sector = page->physical >> 9;
bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_private = &complete;
@ -1102,11 +1119,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
h = (struct btrfs_header *)mapped_buffer;
if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
generation != le64_to_cpu(h->generation) ||
memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE))
BTRFS_UUID_SIZE)) {
sblock->header_error = 1;
} else if (generation != le64_to_cpu(h->generation)) {
sblock->header_error = 1;
sblock->generation_error = 1;
}
csum = h->csum;
} else {
if (!have_csum)
@ -1182,7 +1202,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
bio = bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->bdev;
bio->bi_bdev = page_bad->dev->bdev;
bio->bi_sector = page_bad->physical >> 9;
bio->bi_end_io = scrub_complete_bio_end_io;
bio->bi_private = &complete;
@ -1196,6 +1216,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
/* this will also unplug the queue */
wait_for_completion(&complete);
if (!bio_flagged(bio, BIO_UPTODATE)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
bio_put(bio);
return -EIO;
}
bio_put(bio);
}
@ -1352,7 +1378,8 @@ static int scrub_checksum_super(struct scrub_block *sblock)
u64 mapped_size;
void *p;
u32 crc = ~(u32)0;
int fail = 0;
int fail_gen = 0;
int fail_cor = 0;
u64 len;
int index;
@ -1363,13 +1390,13 @@ static int scrub_checksum_super(struct scrub_block *sblock)
memcpy(on_disk_csum, s->csum, sdev->csum_size);
if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
++fail;
++fail_cor;
if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
++fail;
++fail_gen;
if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
++fail;
++fail_cor;
len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
@ -1394,9 +1421,9 @@ static int scrub_checksum_super(struct scrub_block *sblock)
btrfs_csum_final(crc, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
++fail;
++fail_cor;
if (fail) {
if (fail_cor + fail_gen) {
/*
* if we find an error in a super block, we just report it.
* They will get written with the next transaction commit
@ -1405,9 +1432,15 @@ static int scrub_checksum_super(struct scrub_block *sblock)
spin_lock(&sdev->stat_lock);
++sdev->stat.super_errors;
spin_unlock(&sdev->stat_lock);
if (fail_cor)
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS);
else
btrfs_dev_stat_inc_and_print(sdev->dev,
BTRFS_DEV_STAT_GENERATION_ERRS);
}
return fail;
return fail_cor + fail_gen;
}
static void scrub_block_get(struct scrub_block *sblock)
@ -1551,7 +1584,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
return -ENOMEM;
}
spage->sblock = sblock;
spage->bdev = sdev->dev->bdev;
spage->dev = sdev->dev;
spage->flags = flags;
spage->generation = gen;
spage->logical = logical;

View file

@ -188,7 +188,8 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
va_start(args, fmt);
if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
strncpy(lvl, fmt, 3);
memcpy(lvl, fmt, 3);
lvl[3] = '\0';
fmt += 3;
type = logtypes[fmt[1] - '0'];
} else
@ -435,11 +436,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_thread_pool:
intarg = 0;
match_int(&args[0], &intarg);
if (intarg) {
if (intarg)
info->thread_pool_size = intarg;
printk(KERN_INFO "btrfs: thread pool %d\n",
info->thread_pool_size);
}
break;
case Opt_max_inline:
num = match_strdup(&args[0]);
@ -769,7 +767,7 @@ static int btrfs_fill_super(struct super_block *sb,
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
sb->s_flags |= MS_I_VERSION;
err = open_ctree(sb, fs_devices, (char *)data);
if (err) {
printk("btrfs: open_ctree failed\n");
@ -925,63 +923,48 @@ static inline int is_subvolume_inode(struct inode *inode)
*/
static char *setup_root_args(char *args)
{
unsigned copied = 0;
unsigned len = strlen(args) + 2;
char *pos;
char *ret;
unsigned len = strlen(args) + 2 + 1;
char *src, *dst, *buf;
/*
* We need the same args as before, but minus
* We need the same args as before, but with this substitution:
* s!subvol=[^,]+!subvolid=0!
*
* subvol=a
*
* and add
*
* subvolid=0
*
* which is a difference of 2 characters, so we allocate strlen(args) +
* 2 characters.
* Since the replacement string is up to 2 bytes longer than the
* original, allocate strlen(args) + 2 + 1 bytes.
*/
ret = kzalloc(len * sizeof(char), GFP_NOFS);
if (!ret)
return NULL;
pos = strstr(args, "subvol=");
src = strstr(args, "subvol=");
/* This shouldn't happen, but just in case.. */
if (!pos) {
kfree(ret);
if (!src)
return NULL;
buf = dst = kmalloc(len, GFP_NOFS);
if (!buf)
return NULL;
}
/*
* The subvol=<> arg is not at the front of the string, copy everybody
* up to that into ret.
* If the subvol= arg is not at the start of the string,
* copy whatever precedes it into buf.
*/
if (pos != args) {
*pos = '\0';
strcpy(ret, args);
copied += strlen(args);
pos++;
if (src != args) {
*src++ = '\0';
strcpy(buf, args);
dst += strlen(args);
}
strncpy(ret + copied, "subvolid=0", len - copied);
/* Length of subvolid=0 */
copied += 10;
strcpy(dst, "subvolid=0");
dst += strlen("subvolid=0");
/*
* If there is no , after the subvol= option then we know there's no
* other options and we can just return.
* If there is a "," after the original subvol=... string,
* copy that suffix into our buffer. Otherwise, we're done.
*/
pos = strchr(pos, ',');
if (!pos)
return ret;
src = strchr(src, ',');
if (src)
strcpy(dst, src);
/* Copy the rest of the arguments into our buffer */
strncpy(ret + copied, pos, len - copied);
copied += strlen(pos);
return ret;
return buf;
}
static struct dentry *mount_subvol(const char *subvol_name, int flags,
@ -1118,6 +1101,40 @@ error_fs_info:
return ERR_PTR(error);
}
static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit)
{
spin_lock_irq(&workers->lock);
workers->max_workers = new_limit;
spin_unlock_irq(&workers->lock);
}
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size)
{
if (new_pool_size == old_pool_size)
return;
fs_info->thread_pool_size = new_pool_size;
printk(KERN_INFO "btrfs: resize thread pool %d -> %d\n",
old_pool_size, new_pool_size);
btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size);
btrfs_set_max_workers(&fs_info->workers, new_pool_size);
btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size);
btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size);
btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
@ -1137,6 +1154,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
return 0;
@ -1180,7 +1200,8 @@ restore:
fs_info->compress_type = old_compress_type;
fs_info->max_inline = old_max_inline;
fs_info->alloc_start = old_alloc_start;
fs_info->thread_pool_size = old_thread_pool_size;
btrfs_resize_thread_pool(fs_info,
old_thread_pool_size, fs_info->thread_pool_size);
fs_info->metadata_ratio = old_metadata_ratio;
return ret;
}

View file

@ -28,6 +28,7 @@
#include "locking.h"
#include "tree-log.h"
#include "inode-map.h"
#include "volumes.h"
#define BTRFS_ROOT_TRANS_TAG 0
@ -55,48 +56,49 @@ static noinline void switch_commit_root(struct btrfs_root *root)
static noinline int join_transaction(struct btrfs_root *root, int nofail)
{
struct btrfs_transaction *cur_trans;
struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&root->fs_info->trans_lock);
spin_lock(&fs_info->trans_lock);
loop:
/* The file system has been taken offline. No new transactions. */
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&root->fs_info->trans_lock);
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&fs_info->trans_lock);
return -EROFS;
}
if (root->fs_info->trans_no_join) {
if (fs_info->trans_no_join) {
if (!nofail) {
spin_unlock(&root->fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
return -EBUSY;
}
}
cur_trans = root->fs_info->running_transaction;
cur_trans = fs_info->running_transaction;
if (cur_trans) {
if (cur_trans->aborted) {
spin_unlock(&root->fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
return cur_trans->aborted;
}
atomic_inc(&cur_trans->use_count);
atomic_inc(&cur_trans->num_writers);
cur_trans->num_joined++;
spin_unlock(&root->fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
return 0;
}
spin_unlock(&root->fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
if (!cur_trans)
return -ENOMEM;
spin_lock(&root->fs_info->trans_lock);
if (root->fs_info->running_transaction) {
spin_lock(&fs_info->trans_lock);
if (fs_info->running_transaction) {
/*
* someone started a transaction after we unlocked. Make sure
* to redo the trans_no_join checks above
*/
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
cur_trans = root->fs_info->running_transaction;
cur_trans = fs_info->running_transaction;
goto loop;
}
@ -121,20 +123,38 @@ loop:
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
/*
* although the tree mod log is per file system and not per transaction,
* the log must never go across transaction boundaries.
*/
smp_mb();
if (!list_empty(&fs_info->tree_mod_seq_list)) {
printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when "
"creating a fresh transaction\n");
WARN_ON(1);
}
if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) {
printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
"creating a fresh transaction\n");
WARN_ON(1);
}
atomic_set(&fs_info->tree_mod_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
root->fs_info->btree_inode->i_mapping);
root->fs_info->generation++;
cur_trans->transid = root->fs_info->generation;
root->fs_info->running_transaction = cur_trans;
fs_info->btree_inode->i_mapping);
fs_info->generation++;
cur_trans->transid = fs_info->generation;
fs_info->running_transaction = cur_trans;
cur_trans->aborted = 0;
spin_unlock(&root->fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
return 0;
}
@ -758,6 +778,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
if (ret)
return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);

View file

@ -1628,7 +1628,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
int i;
int ret;
btrfs_read_buffer(eb, gen);
ret = btrfs_read_buffer(eb, gen);
if (ret)
return ret;
level = btrfs_header_level(eb);
@ -1749,7 +1751,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
path->slots[*level]++;
if (wc->free) {
btrfs_read_buffer(next, ptr_gen);
ret = btrfs_read_buffer(next, ptr_gen);
if (ret) {
free_extent_buffer(next);
return ret;
}
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
@ -1766,7 +1772,11 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
free_extent_buffer(next);
continue;
}
btrfs_read_buffer(next, ptr_gen);
ret = btrfs_read_buffer(next, ptr_gen);
if (ret) {
free_extent_buffer(next);
return ret;
}
WARN_ON(*level <= 0);
if (path->nodes[*level-1])
@ -2657,6 +2667,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
}
btrfs_release_path(path);
if (ret > 0)
ret = 0;
return ret;
}
@ -3028,21 +3040,6 @@ out:
return ret;
}
static int inode_in_log(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
mutex_lock(&root->log_mutex);
if (BTRFS_I(inode)->logged_trans == trans->transid &&
BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
ret = 1;
mutex_unlock(&root->log_mutex);
return ret;
}
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
@ -3083,7 +3080,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
if (inode_in_log(trans, inode)) {
if (btrfs_inode_in_log(inode, trans->transid)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}

View file

@ -23,9 +23,9 @@
*
* ulist = ulist_alloc();
* ulist_add(ulist, root);
* elem = NULL;
* ULIST_ITER_INIT(&uiter);
*
* while ((elem = ulist_next(ulist, elem)) {
* while ((elem = ulist_next(ulist, &uiter)) {
* for (all child nodes n in elem)
* ulist_add(ulist, n);
* do something useful with the node;
@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit);
*
* The allocated ulist will be returned in an initialized state.
*/
struct ulist *ulist_alloc(unsigned long gfp_mask)
struct ulist *ulist_alloc(gfp_t gfp_mask)
{
struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask);
@ -144,13 +144,22 @@ EXPORT_SYMBOL(ulist_free);
* unaltered.
*/
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask)
gfp_t gfp_mask)
{
return ulist_add_merge(ulist, val, aux, NULL, gfp_mask);
}
int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long *old_aux, gfp_t gfp_mask)
{
int i;
for (i = 0; i < ulist->nnodes; ++i) {
if (ulist->nodes[i].val == val)
if (ulist->nodes[i].val == val) {
if (old_aux)
*old_aux = ulist->nodes[i].aux;
return 0;
}
}
if (ulist->nnodes >= ulist->nodes_alloced) {
@ -188,33 +197,26 @@ EXPORT_SYMBOL(ulist_add);
/**
* ulist_next - iterate ulist
* @ulist: ulist to iterate
* @prev: previously returned element or %NULL to start iteration
* @uiter: iterator variable, initialized with ULIST_ITER_INIT(&iterator)
*
* Note: locking must be provided by the caller. In case of rwlocks only read
* locking is needed
*
* This function is used to iterate an ulist. The iteration is started with
* @prev = %NULL. It returns the next element from the ulist or %NULL when the
* This function is used to iterate an ulist.
* It returns the next element from the ulist or %NULL when the
* end is reached. No guarantee is made with respect to the order in which
* the elements are returned. They might neither be returned in order of
* addition nor in ascending order.
* It is allowed to call ulist_add during an enumeration. Newly added items
* are guaranteed to show up in the running enumeration.
*/
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev)
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
{
int next;
if (ulist->nnodes == 0)
return NULL;
if (!prev)
return &ulist->nodes[0];
next = (prev - ulist->nodes) + 1;
if (next < 0 || next >= ulist->nnodes)
if (uiter->i < 0 || uiter->i >= ulist->nnodes)
return NULL;
return &ulist->nodes[next];
return &ulist->nodes[uiter->i++];
}
EXPORT_SYMBOL(ulist_next);

View file

@ -24,6 +24,10 @@
*/
#define ULIST_SIZE 16
struct ulist_iterator {
int i;
};
/*
* element of the list
*/
@ -59,10 +63,15 @@ struct ulist {
void ulist_init(struct ulist *ulist);
void ulist_fini(struct ulist *ulist);
void ulist_reinit(struct ulist *ulist);
struct ulist *ulist_alloc(unsigned long gfp_mask);
struct ulist *ulist_alloc(gfp_t gfp_mask);
void ulist_free(struct ulist *ulist);
int ulist_add(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long gfp_mask);
struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev);
gfp_t gfp_mask);
int ulist_add_merge(struct ulist *ulist, u64 val, unsigned long aux,
unsigned long *old_aux, gfp_t gfp_mask);
struct ulist_node *ulist_next(struct ulist *ulist,
struct ulist_iterator *uiter);
#define ULIST_ITER_INIT(uiter) ((uiter)->i = 0)
#endif

View file

@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <asm/div64.h>
#include "compat.h"
@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
static DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
@ -361,6 +364,7 @@ static noinline int device_list_add(const char *path,
return -ENOMEM;
}
device->devid = devid;
device->dev_stats_valid = 0;
device->work.func = pending_bios_fn;
memcpy(device->uuid, disk_super->dev_item.uuid,
BTRFS_UUID_SIZE);
@ -1633,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
int ret = 0;
if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding)
return -EINVAL;
return -EROFS;
bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
root->fs_info->bdev_holder);
@ -4001,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
static void *merge_stripe_index_into_bio_private(void *bi_private,
unsigned int stripe_index)
{
/*
* with single, dup, RAID0, RAID1 and RAID10, stripe_index is
* at most 1.
* The alternative solution (instead of stealing bits from the
* pointer) would be to allocate an intermediate structure
* that contains the old private pointer plus the stripe_index.
*/
BUG_ON((((uintptr_t)bi_private) & 3) != 0);
BUG_ON(stripe_index > 3);
return (void *)(((uintptr_t)bi_private) | stripe_index);
}
static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
{
return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
}
static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
{
return (unsigned int)((uintptr_t)bi_private) & 3;
}
static void btrfs_end_bio(struct bio *bio, int err)
{
struct btrfs_bio *bbio = bio->bi_private;
struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
int is_orig_bio = 0;
if (err)
if (err) {
atomic_inc(&bbio->error);
if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index =
extract_stripe_index_from_bio_private(
bio->bi_private);
struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
dev = bbio->stripes[stripe_index].dev;
if (bio->bi_rw & WRITE)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_READ_ERRS);
if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
btrfs_dev_stat_inc(dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
btrfs_dev_stat_print_on_error(dev);
}
}
if (bio == bbio->orig_bio)
is_orig_bio = 1;
@ -4149,6 +4198,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
bio = first_bio;
}
bio->bi_private = bbio;
bio->bi_private = merge_stripe_index_into_bio_private(
bio->bi_private, (unsigned int)dev_nr);
bio->bi_end_io = btrfs_end_bio;
bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
dev = bbio->stripes[dev_nr].dev;
@ -4509,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root)
return ret;
}
struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
u64 logical, int mirror_num)
{
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
int ret;
u64 map_length = 0;
struct btrfs_bio *bbio = NULL;
struct btrfs_device *device;
BUG_ON(mirror_num == 0);
ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio,
mirror_num);
if (ret) {
BUG_ON(bbio != NULL);
return NULL;
}
BUG_ON(mirror_num != bbio->mirror_num);
device = bbio->stripes[mirror_num - 1].dev;
kfree(bbio);
return device;
}
int btrfs_read_chunk_tree(struct btrfs_root *root)
{
struct btrfs_path *path;
@ -4583,3 +4656,230 @@ error:
btrfs_free_path(path);
return ret;
}
static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
{
int i;
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
btrfs_dev_stat_reset(dev, i);
}
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
{
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct extent_buffer *eb;
int slot;
int ret = 0;
struct btrfs_device *device;
struct btrfs_path *path = NULL;
int i;
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
int item_size;
struct btrfs_dev_stats_item *ptr;
key.objectid = 0;
key.type = BTRFS_DEV_STATS_KEY;
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n",
device->name, (unsigned long long)device->devid);
__btrfs_reset_dev_stats(device);
device->dev_stats_valid = 1;
btrfs_release_path(path);
continue;
}
slot = path->slots[0];
eb = path->nodes[0];
btrfs_item_key_to_cpu(eb, &found_key, slot);
item_size = btrfs_item_size_nr(eb, slot);
ptr = btrfs_item_ptr(eb, slot,
struct btrfs_dev_stats_item);
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
if (item_size >= (1 + i) * sizeof(__le64))
btrfs_dev_stat_set(device, i,
btrfs_dev_stats_value(eb, ptr, i));
else
btrfs_dev_stat_reset(device, i);
}
device->dev_stats_valid = 1;
btrfs_dev_stat_print_on_load(device);
btrfs_release_path(path);
}
mutex_unlock(&fs_devices->device_list_mutex);
out:
btrfs_free_path(path);
return ret < 0 ? ret : 0;
}
static int update_dev_stat_item(struct btrfs_trans_handle *trans,
struct btrfs_root *dev_root,
struct btrfs_device *device)
{
struct btrfs_path *path;
struct btrfs_key key;
struct extent_buffer *eb;
struct btrfs_dev_stats_item *ptr;
int ret;
int i;
key.objectid = 0;
key.type = BTRFS_DEV_STATS_KEY;
key.offset = device->devid;
path = btrfs_alloc_path();
BUG_ON(!path);
ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
if (ret < 0) {
printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n",
ret, device->name);
goto out;
}
if (ret == 0 &&
btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
/* need to delete old one and insert a new one */
ret = btrfs_del_item(trans, dev_root, path);
if (ret != 0) {
printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n",
device->name, ret);
goto out;
}
ret = 1;
}
if (ret == 1) {
/* need to insert a new item */
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, dev_root, path,
&key, sizeof(*ptr));
if (ret < 0) {
printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n",
device->name, ret);
goto out;
}
}
eb = path->nodes[0];
ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
btrfs_set_dev_stats_value(eb, ptr, i,
btrfs_dev_stat_read(device, i));
btrfs_mark_buffer_dirty(eb);
out:
btrfs_free_path(path);
return ret;
}
/*
* called from commit_transaction. Writes all changed device stats to disk.
*/
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *dev_root = fs_info->dev_root;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
int ret = 0;
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (!device->dev_stats_valid || !device->dev_stats_dirty)
continue;
ret = update_dev_stat_item(trans, dev_root, device);
if (!ret)
device->dev_stats_dirty = 0;
}
mutex_unlock(&fs_devices->device_list_mutex);
return ret;
}
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
{
btrfs_dev_stat_inc(dev, index);
btrfs_dev_stat_print_on_error(dev);
}
void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
{
if (!dev->dev_stats_valid)
return;
printk_ratelimited(KERN_ERR
"btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
dev->name,
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
btrfs_dev_stat_read(dev,
BTRFS_DEV_STAT_CORRUPTION_ERRS),
btrfs_dev_stat_read(dev,
BTRFS_DEV_STAT_GENERATION_ERRS));
}
static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
{
printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n",
dev->name,
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
}
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats,
int reset_after_read)
{
struct btrfs_device *dev;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
int i;
mutex_lock(&fs_devices->device_list_mutex);
dev = btrfs_find_device(root, stats->devid, NULL, NULL);
mutex_unlock(&fs_devices->device_list_mutex);
if (!dev) {
printk(KERN_WARNING
"btrfs: get dev_stats failed, device not found\n");
return -ENODEV;
} else if (!dev->dev_stats_valid) {
printk(KERN_WARNING
"btrfs: get dev_stats failed, not yet valid\n");
return -ENODEV;
} else if (reset_after_read) {
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
if (stats->nr_items > i)
stats->values[i] =
btrfs_dev_stat_read_and_reset(dev, i);
else
btrfs_dev_stat_reset(dev, i);
}
} else {
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
if (stats->nr_items > i)
stats->values[i] = btrfs_dev_stat_read(dev, i);
}
if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
return 0;
}

View file

@ -22,6 +22,7 @@
#include <linux/bio.h>
#include <linux/sort.h>
#include "async-thread.h"
#include "ioctl.h"
#define BTRFS_STRIPE_LEN (64 * 1024)
@ -106,6 +107,11 @@ struct btrfs_device {
struct completion flush_wait;
int nobarriers;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */
int dev_stats_valid;
int dev_stats_dirty; /* counters need to be written to disk */
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
};
struct btrfs_fs_devices {
@ -281,4 +287,50 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root,
u64 logical, int mirror_num);
void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats,
int reset_after_read);
int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
{
atomic_inc(dev->dev_stat_values + index);
dev->dev_stats_dirty = 1;
}
static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
int index)
{
return atomic_read(dev->dev_stat_values + index);
}
static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
int index)
{
int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0);
dev->dev_stats_dirty = 1;
return ret;
}
static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
int index, unsigned long val)
{
atomic_set(dev->dev_stat_values + index, val);
dev->dev_stats_dirty = 1;
}
static inline void btrfs_dev_stat_reset(struct btrfs_device *dev,
int index)
{
btrfs_dev_stat_set(dev, index, 0);
}
#endif

View file

@ -196,6 +196,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
if (ret)
goto out;
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);