|
@@ -541,6 +541,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event,
|
|
|
{
|
|
|
assert_spin_locked(&o2hb_live_lock);
|
|
|
|
|
|
+ BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB));
|
|
|
+
|
|
|
event->hn_event_type = type;
|
|
|
event->hn_node = node;
|
|
|
event->hn_node_num = node_num;
|
|
@@ -593,14 +595,22 @@ static int o2hb_check_slot(struct o2hb_region *reg,
|
|
|
u64 cputime;
|
|
|
unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
|
|
|
unsigned int slot_dead_ms;
|
|
|
+ int tmp;
|
|
|
|
|
|
memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
|
|
|
|
|
|
- /* Is this correct? Do we assume that the node doesn't exist
|
|
|
- * if we're not configured for him? */
|
|
|
+ /*
|
|
|
+ * If a node is no longer configured but is still in the livemap, we
|
|
|
+ * may need to clear that bit from the livemap.
|
|
|
+ */
|
|
|
node = o2nm_get_node_by_num(slot->ds_node_num);
|
|
|
- if (!node)
|
|
|
- return 0;
|
|
|
+ if (!node) {
|
|
|
+ spin_lock(&o2hb_live_lock);
|
|
|
+ tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap);
|
|
|
+ spin_unlock(&o2hb_live_lock);
|
|
|
+ if (!tmp)
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
|
|
|
if (!o2hb_verify_crc(reg, hb_block)) {
|
|
|
/* all paths from here will drop o2hb_live_lock for
|
|
@@ -717,8 +727,9 @@ fire_callbacks:
|
|
|
if (list_empty(&o2hb_live_slots[slot->ds_node_num])) {
|
|
|
clear_bit(slot->ds_node_num, o2hb_live_node_bitmap);
|
|
|
|
|
|
- o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node,
|
|
|
- slot->ds_node_num);
|
|
|
+ /* node can be null */
|
|
|
+ o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB,
|
|
|
+ node, slot->ds_node_num);
|
|
|
|
|
|
changed = 1;
|
|
|
}
|
|
@@ -738,7 +749,8 @@ out:
|
|
|
|
|
|
o2hb_run_event_list(&event);
|
|
|
|
|
|
- o2nm_node_put(node);
|
|
|
+ if (node)
|
|
|
+ o2nm_node_put(node);
|
|
|
return changed;
|
|
|
}
|
|
|
|
|
@@ -765,6 +777,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
|
|
{
|
|
|
int i, ret, highest_node, change = 0;
|
|
|
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
|
|
+ unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
|
|
|
struct o2hb_bio_wait_ctxt write_wc;
|
|
|
|
|
|
ret = o2nm_configured_node_map(configured_nodes,
|
|
@@ -774,6 +787,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * If a node is not configured but is in the livemap, we still need
|
|
|
+ * to read the slot so as to be able to remove it from the livemap.
|
|
|
+ */
|
|
|
+ o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap));
|
|
|
+ i = -1;
|
|
|
+ while ((i = find_next_bit(live_node_bitmap,
|
|
|
+ O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
|
|
|
+ set_bit(i, configured_nodes);
|
|
|
+ }
|
|
|
+
|
|
|
highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
|
|
|
if (highest_node >= O2NM_MAX_NODES) {
|
|
|
mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
|