Skip to content

Commit

Permalink
mds: fix false "failing to respond to cache pressure" warning
Browse files Browse the repository at this point in the history
the false warning happens in following sequence of events
- MDS has cache pressure, sends recall state messages to clients
- Client does not trim as many caps as MDS expected. So MDS
  does not reset session->recalled_at
- MDS no longer has cache pressure, it stop sending recall state
  messages to clients.
- Client does not release its caps. So session->recalled_at in
  MDS keeps unchanged

Signed-off-by: Yan, Zheng <zyan@redhat.com>
  • Loading branch information
ukernel committed Oct 14, 2016
1 parent a4ce1f5 commit 51c926a
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 10 deletions.
7 changes: 6 additions & 1 deletion src/mds/Beacon.cc
Expand Up @@ -385,8 +385,10 @@ void Beacon::notify_health(MDSRank const *mds)
{
set<Session*> sessions;
mds->sessionmap.get_client_session_set(sessions);

utime_t cutoff = ceph_clock_now(g_ceph_context);
cutoff -= g_conf->mds_recall_state_timeout;
utime_t last_recall = mds->mdcache->last_recall_state;

std::list<MDSHealthMetric> late_recall_metrics;
std::list<MDSHealthMetric> large_completed_requests_metrics;
Expand All @@ -396,7 +398,10 @@ void Beacon::notify_health(MDSRank const *mds)
dout(20) << "Session servicing RECALL " << session->info.inst
<< ": " << session->recalled_at << " " << session->recall_release_count
<< "/" << session->recall_count << dendl;
if (session->recalled_at < cutoff) {
if (last_recall < cutoff || session->last_recall_sent < last_recall) {
dout(20) << " no longer recall" << dendl;
session->clear_recalled_at();
} else if (session->recalled_at < cutoff) {
dout(20) << " exceeded timeout " << session->recalled_at << " vs. " << cutoff << dendl;
std::ostringstream oss;
oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";
Expand Down
5 changes: 3 additions & 2 deletions src/mds/MDCache.cc
Expand Up @@ -7334,10 +7334,11 @@ void MDCache::check_memory_usage()

if (num_inodes_with_caps > g_conf->mds_cache_size) {
float ratio = (float)g_conf->mds_cache_size * .9 / (float)num_inodes_with_caps;
if (ratio < 1.0)
if (ratio < 1.0) {
last_recall_state = ceph_clock_now(g_ceph_context);
mds->server->recall_client_state(ratio);
}
}

}


Expand Down
2 changes: 2 additions & 0 deletions src/mds/MDCache.h
Expand Up @@ -693,6 +693,8 @@ class MDCache {
void trim_client_leases();
void check_memory_usage();

utime_t last_recall_state;

// shutdown
void shutdown_start();
void shutdown_check();
Expand Down
18 changes: 12 additions & 6 deletions src/mds/SessionMap.cc
Expand Up @@ -789,11 +789,8 @@ void Session::notify_cap_release(size_t n_caps)
{
if (!recalled_at.is_zero()) {
recall_release_count += n_caps;
if (recall_release_count >= recall_count) {
recalled_at = utime_t();
recall_count = 0;
recall_release_count = 0;
}
if (recall_release_count >= recall_count)
clear_recalled_at();
}
}

Expand All @@ -808,13 +805,22 @@ void Session::notify_recall_sent(int const new_limit)
if (recalled_at.is_zero()) {
// Entering recall phase, set up counters so we can later
// judge whether the client has respected the recall request
recalled_at = ceph_clock_now(g_ceph_context);
recalled_at = last_recall_sent = ceph_clock_now(g_ceph_context);
assert (new_limit < caps.size()); // Behaviour of Server::recall_client_state
recall_count = caps.size() - new_limit;
recall_release_count = 0;
} else {
last_recall_sent = ceph_clock_now(g_ceph_context);
}
}

void Session::clear_recalled_at()
{
recalled_at = last_recall_sent = utime_t();
recall_count = 0;
recall_release_count = 0;
}

void Session::set_client_metadata(map<string, string> const &meta)
{
info.client_metadata = meta;
Expand Down
4 changes: 3 additions & 1 deletion src/mds/SessionMap.h
Expand Up @@ -129,6 +129,7 @@ class Session : public RefCountedObject {

// Ephemeral state for tracking progress of capability recalls
utime_t recalled_at; // When was I asked to SESSION_RECALL?
utime_t last_recall_sent;
uint32_t recall_count; // How many caps was I asked to SESSION_RECALL?
uint32_t recall_release_count; // How many caps have I actually revoked?

Expand All @@ -148,6 +149,7 @@ class Session : public RefCountedObject {

void notify_cap_release(size_t n_caps);
void notify_recall_sent(int const new_limit);
void clear_recalled_at();

inodeno_t next_ino() {
if (info.prealloc_inos.empty())
Expand Down Expand Up @@ -315,7 +317,7 @@ class Session : public RefCountedObject {

Session() :
state(STATE_CLOSED), state_seq(0), importing_count(0),
recalled_at(), recall_count(0), recall_release_count(0),
recall_count(0), recall_release_count(0),
auth_caps(g_ceph_context),
connection(NULL), item_session_list(this),
requests(0), // member_offset passed to front() manually
Expand Down

0 comments on commit 51c926a

Please sign in to comment.