123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582 |
- /**
- * @file lleventpoll.cpp
- * @brief Implementation of the LLEventPoll class.
- *
- * $LicenseInfo:firstyear=2006&license=viewergpl$
- *
- * Copyright (c) 2006-2018, Linden Research, Inc.
- * Copyright (c) 2019-2023, Henri Beauchamp.
- *
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- *
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- *
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- *
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
- #include "llviewerprecompiledheaders.h"
- #include "lleventpoll.h"
- #include "llcorehttputil.h"
- #include "hbfastmap.h"
- #include "llhost.h"
- #include "llsdserialize.h"
- #include "lltrans.h"
- #include "llmessage.h"
- #include "llagent.h"
- #include "llappviewer.h"
- #include "llgridmanager.h" // For gIsInSecondLife
- #include "llstatusbar.h"
- #include "llviewercontrol.h"
- // This struture is used to store even poll replies until we can safely process
- // them in the main coroutine of the main thread. HB
- struct LLEventPollReplies
- {
- LL_INLINE LLEventPollReplies(const std::string& poll_name,
- const std::string& msg_name,
- const LLSD& message)
- : mPollName(poll_name),
- mMessageName(msg_name),
- mMessage(message)
- {
- }
- std::string mPollName;
- std::string mMessageName;
- LLSD mMessage;
- };
- static std::vector<LLEventPollReplies> sReplies;
- ///////////////////////////////////////////////////////////////////////////////
- // LLEventPollImpl class
- ///////////////////////////////////////////////////////////////////////////////
- // We will wait RETRY_SECONDS + (error_count * RETRY_SECONDS_INC) before
- // retrying after an error. This means we attempt to recover relatively quickly
- // but back off giving more time to recover until we finally give up after
- // MAX_EVENT_POLL_HTTP_ERRORS attempts.
- // Half of a normal timeout.
- constexpr F32 EVENT_POLL_ERROR_RETRY_SECONDS = 15.f;
- constexpr F32 EVENT_POLL_ERROR_RETRY_SECONDS_INC = 5.f;
- // 5 minutes, by the above rules.
- constexpr S32 MAX_EVENT_POLL_HTTP_ERRORS = 10;
- class LLEventPollImpl : public LLRefCount
- {
- protected:
- LOG_CLASS(LLEventPollImpl);
- public:
- LLEventPollImpl(U64 handle, const LLHost& sender);
- void start(const std::string& url);
- void stop();
- void setRegionName(const std::string& region_name);
- LL_INLINE bool isPollInFlight() const
- {
- return !mRequestTimer.hasExpired() &&
- // Note: take into account the frame rate, so that we would not
- // end up never being able to TP because the events rate would
- // be as high as our frame rate. HB
- mRequestTimer.getElapsedTimeF32() >= mMinDelay - gFrameDT;
- }
- LL_INLINE F32 getPollAge() const
- {
- return mRequestTimer.getElapsedTimeF32();
- }
- private:
- ~LLEventPollImpl();
- void handleMessage(const LLSD& content);
- static void eventPollCoro(std::string url,
- LLPointer<LLEventPollImpl> impl);
- private:
- LLCore::HttpRequest::policy_t mHttpPolicy;
- LLCore::HttpOptions::ptr_t mHttpOptions;
- LLCore::HttpHeaders::ptr_t mHttpHeaders;
- LLCoreHttpUtil::HttpCoroutineAdapter::wptr_t mAdapter;
- U64 mHandle;
- U32 mPollId;
- U32 mRequestTimeout;
- // This is the delay needed between the launch of a request and the moment
- // it can reliably receive server messages; messages arriving within this
- // delay could potentially get lost. HB
- F32 mMinDelay;
- std::string mSenderIP;
- std::string mPollURL;
- std::string mPollName;
- LLTimer mRequestTimer;
- bool mDone;
- static fast_hmap<U64, LLSD> sLastAck;
- static U32 sNextID;
- };
- fast_hmap<U64, LLSD> LLEventPollImpl::sLastAck;
- U32 LLEventPollImpl::sNextID = 1;
- LLEventPollImpl::LLEventPollImpl(U64 handle, const LLHost& sender)
- : mDone(false),
- mMinDelay(LLEventPoll::getMargin()),
- mPollId(sNextID++),
- mHandle(handle),
- mSenderIP(sender.getIPandPort()),
- // NOTE: by using these instead of omitting the corresponding
- // postAndSuspend() parameters, we avoid seeing such classes constructed
- // and destroyed at each loop... HB
- mHttpOptions(new LLCore::HttpOptions),
- mHttpHeaders(new LLCore::HttpHeaders)
- {
- LLAppCoreHttp& app_core_http = gAppViewerp->getAppCoreHttp();
- mHttpPolicy = app_core_http.getPolicy(LLAppCoreHttp::AP_LONG_POLL);
- // The region name is unknown when the event poll instance is created: it
- // is filled up later via calls to LLEventPoll::setRegionName() done by
- // LLViewerRegion. HB
- mPollName = llformat("Event poll <%d> - Sender IP: %s - ", mPollId,
- mSenderIP.c_str());
- llinfos << mPollName << "Initialized." << llendl;
- #if LL_WINDOWS
- static const bool under_wine = gAppViewerp->isRunningUnderWine();
- // When running under Wine, touching the retries and timeouts causes HTTP
- // failures (another Wine bug, obviously), so do not do it then... HB
- if (under_wine)
- {
- llwarns_once << "Running under Wine: cannot set event polls retries and timeout."
- << llendl;
- return;
- }
- #endif
- // Do not retry requests at libcurl level: we want to see the requests
- // timing out here, when they do.
- mHttpOptions->setRetries(0);
- // In SL, we prefer to timeout viewer-side (in libcurl) before the server
- // would send us a bogus HTTP error (502 error report HTML page disguised
- // with a 499 or 500 error code in the header) on its own timeout (set to
- // 30s in SL servers). For OpenSim, we let the server time out on us by
- // default (a 502 error will be then received). The user may however decide
- // to change the default timeout via the corresponding debug setting. HB
- static const char* sl = "EventPollTimeoutForSL";
- static const char* os = "EventPollTimeoutForOS";
- mRequestTimeout = llclamp(gSavedSettings.getU32(gIsInSecondLife ? sl : os),
- 15, 180);
- mHttpOptions->setTimeout(mRequestTimeout);
- mHttpOptions->setTransferTimeout(mRequestTimeout);
- }
- LLEventPollImpl::~LLEventPollImpl()
- {
- mHttpOptions.reset();
- mHttpHeaders.reset();
- LL_DEBUGS("EventPoll") << mPollName << "Destroyed." << LL_ENDL;
- }
- void LLEventPollImpl::setRegionName(const std::string& region_name)
- {
- if (mPollName.find(region_name) == std::string::npos) // Do not spam.
- {
- llinfos << mPollName << "Got region name: " << region_name << llendl;
- mPollName = llformat("Event poll <%d> - Region: %s - ", mPollId,
- region_name.c_str());
- }
- }
- void LLEventPollImpl::start(const std::string& url)
- {
- mPollURL = url;
- if (url.empty())
- {
- return;
- }
- llinfos << "Starting event poll <" << mPollId << "> - Sender IP: "
- << mSenderIP << " - URL: " << mPollURL << llendl;
- std::string coroname =
- gCoros.launch("LLEventPollImpl::eventPollCoro",
- boost::bind(&LLEventPollImpl::eventPollCoro, url, this));
- LL_DEBUGS("EventPoll") << mPollName << "Coroutine name: " << coroname
- << LL_ENDL;
- }
- void LLEventPollImpl::stop()
- {
- mDone = true;
- LLCoreHttpUtil::HttpCoroutineAdapter::ptr_t adapterp = mAdapter.lock();
- if (adapterp)
- {
- llinfos << mPollName << "Cancelling..." << llendl;
- // Cancel the yielding operation if any.
- adapterp->cancelSuspendedOperation();
- }
- else
- {
- LL_DEBUGS("EventPoll") << mPollName
- << "Already stopped, no action taken."
- << LL_ENDL;
- }
- }
- void LLEventPollImpl::handleMessage(const LLSD& content)
- {
- std::string msg_name = content["message"];
- LLSD message;
- message["sender"] = mSenderIP;
- if (content.has("body"))
- {
- message["body"] = content["body"];
- LL_DEBUGS("EventPoll") << mPollName << "Queuing message: " << msg_name
- << LL_ENDL;
- }
- else
- {
- llwarns << mPollName << "Message '" << msg_name << "' without a body."
- << llendl;
- }
- // Note: coroutines calling handleMessage() all belong to the main thread,
- // so we do not need a mutex before touching sReplies; should this ever
- // change, a mutex lock would be needed here. HB
- sReplies.emplace_back(mPollName, msg_name, message);
- }
- //static
- void LLEventPollImpl::eventPollCoro(std::string url,
- LLPointer<LLEventPollImpl> impl)
- {
- // Hold a LLPointer of our impl on the coroutine stack, so to avoid the
- // impl destruction before the exit of the coroutine. HB
- LLPointer<LLEventPollImpl> self = impl;
- LLCoreHttpUtil::HttpCoroutineAdapter::ptr_t
- adapter(new LLCoreHttpUtil::HttpCoroutineAdapter("EventPoller",
- self->mHttpPolicy));
- self->mAdapter = adapter;
- LL_DEBUGS("EventPoll") << self->mPollName << "Entering coroutine."
- << LL_ENDL;
- // This delay determines a window for TP requests to be sent to the server:
- // we avoid sending one when the current poll request is about to expire,
- // so to avoid a race condition between servers (sim server, Apache server)
- // and viewer, where the TeleportFinish message could get lost during the
- // HTTP requests tear-down and restart. HB
- const F32 expiry = F32(self->mRequestTimeout) - LLEventPoll::getMargin();
- LLSD acknowledge;
- // Get the last "ack" we used in previous LLEventPollImpl instances for
- // this region, if any. HB
- fast_hmap<U64, LLSD>::const_iterator it = sLastAck.find(self->mHandle);
- if (it != sLastAck.end())
- {
- acknowledge = it->second;
- }
- // Continually poll for a server update until we have been terminated
- S32 error_count = 0;
- while (!self->mDone && !gDisconnected)
- {
- LLSD request;
- request["ack"] = acknowledge;
- request["done"] = false;
- LL_DEBUGS("EventPoll") << self->mPollName << "Posting and yielding."
- << LL_ENDL;
- self->mRequestTimer.reset();
- self->mRequestTimer.setTimerExpirySec(expiry);
- LLSD result = adapter->postAndSuspend(url, request, self->mHttpOptions,
- self->mHttpHeaders);
- // Note: resetting the timer flags it as "expired", which we want to
- // ensure so that isPollInFlight() returns false at this point. HB
- F32 request_time = self->mRequestTimer.getElapsedTimeAndResetF32();
- // If this request is fastest than our preset "min delay for an
- // established connection", then the latter is obviously too large, and
- // needs to be reduced. HB
- if (request_time < self->mMinDelay)
- {
- self->mMinDelay = request_time;
- LL_DEBUGS("EventPoll") << self->mPollName
- << "Minimum delay for established connection reduced to: "
- << request_time << LL_ENDL;
- }
- if (gDisconnected)
- {
- llinfos << self->mPollName
- << "Viewer disconnected. Dropping stale event message."
- << llendl;
- break;
- }
- bool is_agent_region = gAgent.getRegionHandle() == self->mHandle;
- LLCore::HttpStatus status =
- LLCoreHttpUtil::HttpCoroutineAdapter::getStatusFromLLSD(result);
- if (!status)
- {
- if (status == gStatusTimeout)
- {
- // A standard timeout response: we get this when there are no
- // events.
- LL_DEBUGS("EventPoll") << self->mPollName
- << "Request timed out viewer-side after: "
- << request_time << "s." << LL_ENDL;
- error_count = 0;
- continue;
- }
- // Log details when debugging for all other types of errors. HB
- LL_DEBUGS("EventPoll") << self->mPollName
- << "Error received after: "
- << request_time << "s."
- << " - Error " << status.toTerseString()
- << ": " << status.getMessage();
- const LLSD& http_results =
- result[LLCoreHttpUtil::HttpCoroutineAdapter::HTTP_RESULTS];
- if (http_results.has("error_body"))
- {
- std::string body = http_results["error_body"].asString();
- LL_CONT << " - Returned body:\n" << body;
- }
- LL_CONT << LL_ENDL;
- // When the server times out (because there was no event to
- // report), error 502 is seen on OpenSim grids, and should be seen
- // in SL, but are somehow "mutated" (their header is changed, but
- // not their "502 error" body) into 499 or 500 errors. Treat as
- // timeout and restart. HB
- if (status == gStatusBadGateway ||
- (gIsInSecondLife &&
- (status == gStatusInternalError ||
- status == gStatusServerInternalError)))
- {
- LL_DEBUGS("EventPoll") << "Error ignored and treated as server-side timeout."
- << LL_ENDL;
- error_count = 0;
- continue;
- }
- if (status == gStatusCancelled)
- {
- // Event polling for this server has been cancelled.
- llinfos << self->mPollName << "Cancelled." << llendl;
- break;
- }
- if (status == gStatusNotFound)
- {
- // Do not give up on 404 if this is the agent region ! HB
- if (!is_agent_region)
- {
- // In some cases the server gets ahead of the viewer and
- // will return a 404 error (not found) before the cancel
- // event comes back in the queue.
- llinfos << self->mPollName << "Cancelled on 404."
- << llendl;
- break;
- }
- }
- else if (!status.isHttpStatus())
- {
- // Some libcurl error (other than gStatusTimeout) or LLCore
- // error (other than gStatusCancelled) was returned. This is
- // unlikely to be recoverable...
- llwarns << self->mPollName
- << "Critical error returned from libraries. Cancelling coroutine."
- << llendl;
- break;
- }
- S32 max_retries = MAX_EVENT_POLL_HTTP_ERRORS;
- if (is_agent_region)
- {
- // Increase the number of allowed retries for the agent region:
- // there may be a temporary network issue, and we do not want
- // the viewer to give up too soon on the agent's region, since
- // it would cause a disconnection from the grid (see below). HB
- max_retries *= 2;
- llwarns << self->mPollName
- << "Agent's region poll request error: "
- << status.toTerseString() << ": "
- << status.getMessage() << llendl;
- if (gStatusBarp)
- {
- gStatusBarp->incFailedEventPolls();
- }
- }
- if (error_count < max_retries)
- {
- // An unanticipated error has been received from our poll
- // request. Calculate a timeout and wait for it to expire
- // (sleep) before trying again. The sleep time is increased by
- // EVENT_POLL_ERROR_RETRY_SECONDS_INC seconds for each
- // consecutive error until MAX_EVENT_POLL_HTTP_ERRORS is
- // reached.
- F32 wait = EVENT_POLL_ERROR_RETRY_SECONDS +
- error_count * EVENT_POLL_ERROR_RETRY_SECONDS_INC;
- llwarns << self->mPollName << "Retrying in " << wait
- << " seconds; error count is now " << ++error_count
- << llendl;
- llcoro::suspendUntilTimeout(wait);
- LL_DEBUGS("EventPoll") << self->mPollName
- << "About to retry request." << LL_ENDL;
- continue;
- }
- // At this point we have given up and the viewer will not receive
- // HTTP messages from the simulator. IMs, teleports, about land,
- // selecting land, region crossing and more will all fail. They are
- // essentially disconnected from the region even though some things
- // may still work. Since things would not get better until they
- // relog we force a disconnect now.
- if (is_agent_region)
- {
- llwarns << self->mPollName
- << "Forcing disconnect due to stalled agent region event poll."
- << llendl;
- gAppViewerp->forceDisconnect(LLTrans::getString("AgentLostConnection"));
- }
- else
- {
- llwarns << self->mPollName
- << "Stalled region event poll. Giving up." << llendl;
- }
- self->mDone = true;
- break;
- }
- else if (is_agent_region && gStatusBarp)
- {
- gStatusBarp->resetFailedEventPolls();
- }
- error_count = 0;
- if (!result.isMap() || !result.has("events") ||
- !result["events"].isArray() || !result.has("id"))
- {
- llwarns << self->mPollName
- << "Received reply without event or 'id' key: "
- << LLSDXMLStreamer(result) << llendl;
- continue;
- }
- acknowledge = result["id"];
- if (acknowledge.isUndefined())
- {
- LL_DEBUGS("EventPoll") << self->mPollName
- << "Got reply with undefined 'id' key."
- << LL_ENDL;
- sLastAck.erase(self->mHandle);
- }
- else
- {
- sLastAck[self->mHandle] = acknowledge;
- }
- const LLSD& events = result["events"];
- LL_DEBUGS("EventPoll") << self->mPollName << "Got "
- << events.size() << " event(s):\n"
- << LLSDXMLStreamer(acknowledge) << LL_ENDL;
- for (LLSD::array_const_iterator it = events.beginArray(),
- end = events.endArray();
- it != end; ++it)
- {
- if (it->has("message"))
- {
- self->handleMessage(*it);
- }
- }
- }
- LL_DEBUGS("EventPoll") << self->mPollName << "Leaving coroutine."
- << LL_ENDL;
- }
- ///////////////////////////////////////////////////////////////////////////////
- // LLEventPoll class proper
- ///////////////////////////////////////////////////////////////////////////////
- LLEventPoll::LLEventPoll(U64 handle, const LLHost& sender,
- const std::string& poll_url)
- : mImpl(new LLEventPollImpl(handle, sender))
- {
- mImpl->start(poll_url);
- }
- LLEventPoll::~LLEventPoll()
- {
- mImpl->stop();
- // Note: LLEventPollImpl instance will get deleted on coroutine exit since
- // the coroutine keeps a LLPointer to its instance on its own stack. HB
- mImpl = NULL;
- }
- void LLEventPoll::setRegionName(const std::string& region_name)
- {
- if (mImpl.notNull())
- {
- mImpl->setRegionName(region_name);
- }
- }
- bool LLEventPoll::isPollInFlight() const
- {
- return mImpl.notNull() && mImpl->isPollInFlight();
- }
- F32 LLEventPoll::getPollAge() const
- {
- return mImpl.notNull() ? mImpl->getPollAge() : -1.f;
- }
- //static
- F32 LLEventPoll::getMargin()
- {
- static LLCachedControl<U32> margin(gSavedSettings,
- "EventPollAgeWindowMargin");
- return llclamp((F32)margin, 200.f, 2000.f) * 0.001f;
- }
- //static
- void LLEventPoll::dispatchMessages()
- {
- // Note: coroutines calling handleMessage() all belong to the main thread,
- // so we do not need a mutex before touching sReplies; should this ever
- // change, a mutex lock would be needed here. HB
- for (U32 i = 0, count = sReplies.size(); i < count; ++i)
- {
- LLEventPollReplies& reply = sReplies[i];
- LL_DEBUGS("EventPoll") << reply.mPollName << "Processing message: "
- << reply.mMessageName << LL_ENDL;
- LLMessageSystem::dispatch(reply.mMessageName, reply.mMessage);
- }
- sReplies.clear();
- }
|