lleventpoll.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. /**
  2. * @file lleventpoll.cpp
  3. * @brief Implementation of the LLEventPoll class.
  4. *
  5. * $LicenseInfo:firstyear=2006&license=viewergpl$
  6. *
  7. * Copyright (c) 2006-2018, Linden Research, Inc.
  8. * Copyright (c) 2019-2023, Henri Beauchamp.
  9. *
  10. * Second Life Viewer Source Code
  11. * The source code in this file ("Source Code") is provided by Linden Lab
  12. * to you under the terms of the GNU General Public License, version 2.0
  13. * ("GPL"), unless you have obtained a separate licensing agreement
  14. * ("Other License"), formally executed by you and Linden Lab. Terms of
  15. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  16. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  17. *
  18. * There are special exceptions to the terms and conditions of the GPL as
  19. * it is applied to this Source Code. View the full text of the exception
  20. * in the file doc/FLOSS-exception.txt in this software distribution, or
  21. * online at
  22. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  23. *
  24. * By copying, modifying or distributing this software, you acknowledge
  25. * that you have read and understood your obligations described above,
  26. * and agree to abide by those obligations.
  27. *
  28. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  29. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  30. * COMPLETENESS OR PERFORMANCE.
  31. * $/LicenseInfo$
  32. */
  33. #include "llviewerprecompiledheaders.h"
  34. #include "lleventpoll.h"
  35. #include "llcorehttputil.h"
  36. #include "hbfastmap.h"
  37. #include "llhost.h"
  38. #include "llsdserialize.h"
  39. #include "lltrans.h"
  40. #include "llmessage.h"
  41. #include "llagent.h"
  42. #include "llappviewer.h"
  43. #include "llgridmanager.h" // For gIsInSecondLife
  44. #include "llstatusbar.h"
  45. #include "llviewercontrol.h"
  46. // This struture is used to store even poll replies until we can safely process
  47. // them in the main coroutine of the main thread. HB
  48. struct LLEventPollReplies
  49. {
  50. LL_INLINE LLEventPollReplies(const std::string& poll_name,
  51. const std::string& msg_name,
  52. const LLSD& message)
  53. : mPollName(poll_name),
  54. mMessageName(msg_name),
  55. mMessage(message)
  56. {
  57. }
  58. std::string mPollName;
  59. std::string mMessageName;
  60. LLSD mMessage;
  61. };
  62. static std::vector<LLEventPollReplies> sReplies;
  63. ///////////////////////////////////////////////////////////////////////////////
  64. // LLEventPollImpl class
  65. ///////////////////////////////////////////////////////////////////////////////
  66. // We will wait RETRY_SECONDS + (error_count * RETRY_SECONDS_INC) before
  67. // retrying after an error. This means we attempt to recover relatively quickly
  68. // but back off giving more time to recover until we finally give up after
  69. // MAX_EVENT_POLL_HTTP_ERRORS attempts.
  70. // Half of a normal timeout.
  71. constexpr F32 EVENT_POLL_ERROR_RETRY_SECONDS = 15.f;
  72. constexpr F32 EVENT_POLL_ERROR_RETRY_SECONDS_INC = 5.f;
  73. // 5 minutes, by the above rules.
  74. constexpr S32 MAX_EVENT_POLL_HTTP_ERRORS = 10;
  75. class LLEventPollImpl : public LLRefCount
  76. {
  77. protected:
  78. LOG_CLASS(LLEventPollImpl);
  79. public:
  80. LLEventPollImpl(U64 handle, const LLHost& sender);
  81. void start(const std::string& url);
  82. void stop();
  83. void setRegionName(const std::string& region_name);
  84. LL_INLINE bool isPollInFlight() const
  85. {
  86. return !mRequestTimer.hasExpired() &&
  87. // Note: take into account the frame rate, so that we would not
  88. // end up never being able to TP because the events rate would
  89. // be as high as our frame rate. HB
  90. mRequestTimer.getElapsedTimeF32() >= mMinDelay - gFrameDT;
  91. }
  92. LL_INLINE F32 getPollAge() const
  93. {
  94. return mRequestTimer.getElapsedTimeF32();
  95. }
  96. private:
  97. ~LLEventPollImpl();
  98. void handleMessage(const LLSD& content);
  99. static void eventPollCoro(std::string url,
  100. LLPointer<LLEventPollImpl> impl);
  101. private:
  102. LLCore::HttpRequest::policy_t mHttpPolicy;
  103. LLCore::HttpOptions::ptr_t mHttpOptions;
  104. LLCore::HttpHeaders::ptr_t mHttpHeaders;
  105. LLCoreHttpUtil::HttpCoroutineAdapter::wptr_t mAdapter;
  106. U64 mHandle;
  107. U32 mPollId;
  108. U32 mRequestTimeout;
  109. // This is the delay needed between the launch of a request and the moment
  110. // it can reliably receive server messages; messages arriving within this
  111. // delay could potentially get lost. HB
  112. F32 mMinDelay;
  113. std::string mSenderIP;
  114. std::string mPollURL;
  115. std::string mPollName;
  116. LLTimer mRequestTimer;
  117. bool mDone;
  118. static fast_hmap<U64, LLSD> sLastAck;
  119. static U32 sNextID;
  120. };
  121. fast_hmap<U64, LLSD> LLEventPollImpl::sLastAck;
  122. U32 LLEventPollImpl::sNextID = 1;
  123. LLEventPollImpl::LLEventPollImpl(U64 handle, const LLHost& sender)
  124. : mDone(false),
  125. mMinDelay(LLEventPoll::getMargin()),
  126. mPollId(sNextID++),
  127. mHandle(handle),
  128. mSenderIP(sender.getIPandPort()),
  129. // NOTE: by using these instead of omitting the corresponding
  130. // postAndSuspend() parameters, we avoid seeing such classes constructed
  131. // and destroyed at each loop... HB
  132. mHttpOptions(new LLCore::HttpOptions),
  133. mHttpHeaders(new LLCore::HttpHeaders)
  134. {
  135. LLAppCoreHttp& app_core_http = gAppViewerp->getAppCoreHttp();
  136. mHttpPolicy = app_core_http.getPolicy(LLAppCoreHttp::AP_LONG_POLL);
  137. // The region name is unknown when the event poll instance is created: it
  138. // is filled up later via calls to LLEventPoll::setRegionName() done by
  139. // LLViewerRegion. HB
  140. mPollName = llformat("Event poll <%d> - Sender IP: %s - ", mPollId,
  141. mSenderIP.c_str());
  142. llinfos << mPollName << "Initialized." << llendl;
  143. #if LL_WINDOWS
  144. static const bool under_wine = gAppViewerp->isRunningUnderWine();
  145. // When running under Wine, touching the retries and timeouts causes HTTP
  146. // failures (another Wine bug, obviously), so do not do it then... HB
  147. if (under_wine)
  148. {
  149. llwarns_once << "Running under Wine: cannot set event polls retries and timeout."
  150. << llendl;
  151. return;
  152. }
  153. #endif
  154. // Do not retry requests at libcurl level: we want to see the requests
  155. // timing out here, when they do.
  156. mHttpOptions->setRetries(0);
  157. // In SL, we prefer to timeout viewer-side (in libcurl) before the server
  158. // would send us a bogus HTTP error (502 error report HTML page disguised
  159. // with a 499 or 500 error code in the header) on its own timeout (set to
  160. // 30s in SL servers). For OpenSim, we let the server time out on us by
  161. // default (a 502 error will be then received). The user may however decide
  162. // to change the default timeout via the corresponding debug setting. HB
  163. static const char* sl = "EventPollTimeoutForSL";
  164. static const char* os = "EventPollTimeoutForOS";
  165. mRequestTimeout = llclamp(gSavedSettings.getU32(gIsInSecondLife ? sl : os),
  166. 15, 180);
  167. mHttpOptions->setTimeout(mRequestTimeout);
  168. mHttpOptions->setTransferTimeout(mRequestTimeout);
  169. }
  170. LLEventPollImpl::~LLEventPollImpl()
  171. {
  172. mHttpOptions.reset();
  173. mHttpHeaders.reset();
  174. LL_DEBUGS("EventPoll") << mPollName << "Destroyed." << LL_ENDL;
  175. }
  176. void LLEventPollImpl::setRegionName(const std::string& region_name)
  177. {
  178. if (mPollName.find(region_name) == std::string::npos) // Do not spam.
  179. {
  180. llinfos << mPollName << "Got region name: " << region_name << llendl;
  181. mPollName = llformat("Event poll <%d> - Region: %s - ", mPollId,
  182. region_name.c_str());
  183. }
  184. }
  185. void LLEventPollImpl::start(const std::string& url)
  186. {
  187. mPollURL = url;
  188. if (url.empty())
  189. {
  190. return;
  191. }
  192. llinfos << "Starting event poll <" << mPollId << "> - Sender IP: "
  193. << mSenderIP << " - URL: " << mPollURL << llendl;
  194. std::string coroname =
  195. gCoros.launch("LLEventPollImpl::eventPollCoro",
  196. boost::bind(&LLEventPollImpl::eventPollCoro, url, this));
  197. LL_DEBUGS("EventPoll") << mPollName << "Coroutine name: " << coroname
  198. << LL_ENDL;
  199. }
  200. void LLEventPollImpl::stop()
  201. {
  202. mDone = true;
  203. LLCoreHttpUtil::HttpCoroutineAdapter::ptr_t adapterp = mAdapter.lock();
  204. if (adapterp)
  205. {
  206. llinfos << mPollName << "Cancelling..." << llendl;
  207. // Cancel the yielding operation if any.
  208. adapterp->cancelSuspendedOperation();
  209. }
  210. else
  211. {
  212. LL_DEBUGS("EventPoll") << mPollName
  213. << "Already stopped, no action taken."
  214. << LL_ENDL;
  215. }
  216. }
  217. void LLEventPollImpl::handleMessage(const LLSD& content)
  218. {
  219. std::string msg_name = content["message"];
  220. LLSD message;
  221. message["sender"] = mSenderIP;
  222. if (content.has("body"))
  223. {
  224. message["body"] = content["body"];
  225. LL_DEBUGS("EventPoll") << mPollName << "Queuing message: " << msg_name
  226. << LL_ENDL;
  227. }
  228. else
  229. {
  230. llwarns << mPollName << "Message '" << msg_name << "' without a body."
  231. << llendl;
  232. }
  233. // Note: coroutines calling handleMessage() all belong to the main thread,
  234. // so we do not need a mutex before touching sReplies; should this ever
  235. // change, a mutex lock would be needed here. HB
  236. sReplies.emplace_back(mPollName, msg_name, message);
  237. }
  238. //static
  239. void LLEventPollImpl::eventPollCoro(std::string url,
  240. LLPointer<LLEventPollImpl> impl)
  241. {
  242. // Hold a LLPointer of our impl on the coroutine stack, so to avoid the
  243. // impl destruction before the exit of the coroutine. HB
  244. LLPointer<LLEventPollImpl> self = impl;
  245. LLCoreHttpUtil::HttpCoroutineAdapter::ptr_t
  246. adapter(new LLCoreHttpUtil::HttpCoroutineAdapter("EventPoller",
  247. self->mHttpPolicy));
  248. self->mAdapter = adapter;
  249. LL_DEBUGS("EventPoll") << self->mPollName << "Entering coroutine."
  250. << LL_ENDL;
  251. // This delay determines a window for TP requests to be sent to the server:
  252. // we avoid sending one when the current poll request is about to expire,
  253. // so to avoid a race condition between servers (sim server, Apache server)
  254. // and viewer, where the TeleportFinish message could get lost during the
  255. // HTTP requests tear-down and restart. HB
  256. const F32 expiry = F32(self->mRequestTimeout) - LLEventPoll::getMargin();
  257. LLSD acknowledge;
  258. // Get the last "ack" we used in previous LLEventPollImpl instances for
  259. // this region, if any. HB
  260. fast_hmap<U64, LLSD>::const_iterator it = sLastAck.find(self->mHandle);
  261. if (it != sLastAck.end())
  262. {
  263. acknowledge = it->second;
  264. }
  265. // Continually poll for a server update until we have been terminated
  266. S32 error_count = 0;
  267. while (!self->mDone && !gDisconnected)
  268. {
  269. LLSD request;
  270. request["ack"] = acknowledge;
  271. request["done"] = false;
  272. LL_DEBUGS("EventPoll") << self->mPollName << "Posting and yielding."
  273. << LL_ENDL;
  274. self->mRequestTimer.reset();
  275. self->mRequestTimer.setTimerExpirySec(expiry);
  276. LLSD result = adapter->postAndSuspend(url, request, self->mHttpOptions,
  277. self->mHttpHeaders);
  278. // Note: resetting the timer flags it as "expired", which we want to
  279. // ensure so that isPollInFlight() returns false at this point. HB
  280. F32 request_time = self->mRequestTimer.getElapsedTimeAndResetF32();
  281. // If this request is fastest than our preset "min delay for an
  282. // established connection", then the latter is obviously too large, and
  283. // needs to be reduced. HB
  284. if (request_time < self->mMinDelay)
  285. {
  286. self->mMinDelay = request_time;
  287. LL_DEBUGS("EventPoll") << self->mPollName
  288. << "Minimum delay for established connection reduced to: "
  289. << request_time << LL_ENDL;
  290. }
  291. if (gDisconnected)
  292. {
  293. llinfos << self->mPollName
  294. << "Viewer disconnected. Dropping stale event message."
  295. << llendl;
  296. break;
  297. }
  298. bool is_agent_region = gAgent.getRegionHandle() == self->mHandle;
  299. LLCore::HttpStatus status =
  300. LLCoreHttpUtil::HttpCoroutineAdapter::getStatusFromLLSD(result);
  301. if (!status)
  302. {
  303. if (status == gStatusTimeout)
  304. {
  305. // A standard timeout response: we get this when there are no
  306. // events.
  307. LL_DEBUGS("EventPoll") << self->mPollName
  308. << "Request timed out viewer-side after: "
  309. << request_time << "s." << LL_ENDL;
  310. error_count = 0;
  311. continue;
  312. }
  313. // Log details when debugging for all other types of errors. HB
  314. LL_DEBUGS("EventPoll") << self->mPollName
  315. << "Error received after: "
  316. << request_time << "s."
  317. << " - Error " << status.toTerseString()
  318. << ": " << status.getMessage();
  319. const LLSD& http_results =
  320. result[LLCoreHttpUtil::HttpCoroutineAdapter::HTTP_RESULTS];
  321. if (http_results.has("error_body"))
  322. {
  323. std::string body = http_results["error_body"].asString();
  324. LL_CONT << " - Returned body:\n" << body;
  325. }
  326. LL_CONT << LL_ENDL;
  327. // When the server times out (because there was no event to
  328. // report), error 502 is seen on OpenSim grids, and should be seen
  329. // in SL, but are somehow "mutated" (their header is changed, but
  330. // not their "502 error" body) into 499 or 500 errors. Treat as
  331. // timeout and restart. HB
  332. if (status == gStatusBadGateway ||
  333. (gIsInSecondLife &&
  334. (status == gStatusInternalError ||
  335. status == gStatusServerInternalError)))
  336. {
  337. LL_DEBUGS("EventPoll") << "Error ignored and treated as server-side timeout."
  338. << LL_ENDL;
  339. error_count = 0;
  340. continue;
  341. }
  342. if (status == gStatusCancelled)
  343. {
  344. // Event polling for this server has been cancelled.
  345. llinfos << self->mPollName << "Cancelled." << llendl;
  346. break;
  347. }
  348. if (status == gStatusNotFound)
  349. {
  350. // Do not give up on 404 if this is the agent region ! HB
  351. if (!is_agent_region)
  352. {
  353. // In some cases the server gets ahead of the viewer and
  354. // will return a 404 error (not found) before the cancel
  355. // event comes back in the queue.
  356. llinfos << self->mPollName << "Cancelled on 404."
  357. << llendl;
  358. break;
  359. }
  360. }
  361. else if (!status.isHttpStatus())
  362. {
  363. // Some libcurl error (other than gStatusTimeout) or LLCore
  364. // error (other than gStatusCancelled) was returned. This is
  365. // unlikely to be recoverable...
  366. llwarns << self->mPollName
  367. << "Critical error returned from libraries. Cancelling coroutine."
  368. << llendl;
  369. break;
  370. }
  371. S32 max_retries = MAX_EVENT_POLL_HTTP_ERRORS;
  372. if (is_agent_region)
  373. {
  374. // Increase the number of allowed retries for the agent region:
  375. // there may be a temporary network issue, and we do not want
  376. // the viewer to give up too soon on the agent's region, since
  377. // it would cause a disconnection from the grid (see below). HB
  378. max_retries *= 2;
  379. llwarns << self->mPollName
  380. << "Agent's region poll request error: "
  381. << status.toTerseString() << ": "
  382. << status.getMessage() << llendl;
  383. if (gStatusBarp)
  384. {
  385. gStatusBarp->incFailedEventPolls();
  386. }
  387. }
  388. if (error_count < max_retries)
  389. {
  390. // An unanticipated error has been received from our poll
  391. // request. Calculate a timeout and wait for it to expire
  392. // (sleep) before trying again. The sleep time is increased by
  393. // EVENT_POLL_ERROR_RETRY_SECONDS_INC seconds for each
  394. // consecutive error until MAX_EVENT_POLL_HTTP_ERRORS is
  395. // reached.
  396. F32 wait = EVENT_POLL_ERROR_RETRY_SECONDS +
  397. error_count * EVENT_POLL_ERROR_RETRY_SECONDS_INC;
  398. llwarns << self->mPollName << "Retrying in " << wait
  399. << " seconds; error count is now " << ++error_count
  400. << llendl;
  401. llcoro::suspendUntilTimeout(wait);
  402. LL_DEBUGS("EventPoll") << self->mPollName
  403. << "About to retry request." << LL_ENDL;
  404. continue;
  405. }
  406. // At this point we have given up and the viewer will not receive
  407. // HTTP messages from the simulator. IMs, teleports, about land,
  408. // selecting land, region crossing and more will all fail. They are
  409. // essentially disconnected from the region even though some things
  410. // may still work. Since things would not get better until they
  411. // relog we force a disconnect now.
  412. if (is_agent_region)
  413. {
  414. llwarns << self->mPollName
  415. << "Forcing disconnect due to stalled agent region event poll."
  416. << llendl;
  417. gAppViewerp->forceDisconnect(LLTrans::getString("AgentLostConnection"));
  418. }
  419. else
  420. {
  421. llwarns << self->mPollName
  422. << "Stalled region event poll. Giving up." << llendl;
  423. }
  424. self->mDone = true;
  425. break;
  426. }
  427. else if (is_agent_region && gStatusBarp)
  428. {
  429. gStatusBarp->resetFailedEventPolls();
  430. }
  431. error_count = 0;
  432. if (!result.isMap() || !result.has("events") ||
  433. !result["events"].isArray() || !result.has("id"))
  434. {
  435. llwarns << self->mPollName
  436. << "Received reply without event or 'id' key: "
  437. << LLSDXMLStreamer(result) << llendl;
  438. continue;
  439. }
  440. acknowledge = result["id"];
  441. if (acknowledge.isUndefined())
  442. {
  443. LL_DEBUGS("EventPoll") << self->mPollName
  444. << "Got reply with undefined 'id' key."
  445. << LL_ENDL;
  446. sLastAck.erase(self->mHandle);
  447. }
  448. else
  449. {
  450. sLastAck[self->mHandle] = acknowledge;
  451. }
  452. const LLSD& events = result["events"];
  453. LL_DEBUGS("EventPoll") << self->mPollName << "Got "
  454. << events.size() << " event(s):\n"
  455. << LLSDXMLStreamer(acknowledge) << LL_ENDL;
  456. for (LLSD::array_const_iterator it = events.beginArray(),
  457. end = events.endArray();
  458. it != end; ++it)
  459. {
  460. if (it->has("message"))
  461. {
  462. self->handleMessage(*it);
  463. }
  464. }
  465. }
  466. LL_DEBUGS("EventPoll") << self->mPollName << "Leaving coroutine."
  467. << LL_ENDL;
  468. }
  469. ///////////////////////////////////////////////////////////////////////////////
  470. // LLEventPoll class proper
  471. ///////////////////////////////////////////////////////////////////////////////
  472. LLEventPoll::LLEventPoll(U64 handle, const LLHost& sender,
  473. const std::string& poll_url)
  474. : mImpl(new LLEventPollImpl(handle, sender))
  475. {
  476. mImpl->start(poll_url);
  477. }
  478. LLEventPoll::~LLEventPoll()
  479. {
  480. mImpl->stop();
  481. // Note: LLEventPollImpl instance will get deleted on coroutine exit since
  482. // the coroutine keeps a LLPointer to its instance on its own stack. HB
  483. mImpl = NULL;
  484. }
  485. void LLEventPoll::setRegionName(const std::string& region_name)
  486. {
  487. if (mImpl.notNull())
  488. {
  489. mImpl->setRegionName(region_name);
  490. }
  491. }
  492. bool LLEventPoll::isPollInFlight() const
  493. {
  494. return mImpl.notNull() && mImpl->isPollInFlight();
  495. }
  496. F32 LLEventPoll::getPollAge() const
  497. {
  498. return mImpl.notNull() ? mImpl->getPollAge() : -1.f;
  499. }
  500. //static
  501. F32 LLEventPoll::getMargin()
  502. {
  503. static LLCachedControl<U32> margin(gSavedSettings,
  504. "EventPollAgeWindowMargin");
  505. return llclamp((F32)margin, 200.f, 2000.f) * 0.001f;
  506. }
  507. //static
  508. void LLEventPoll::dispatchMessages()
  509. {
  510. // Note: coroutines calling handleMessage() all belong to the main thread,
  511. // so we do not need a mutex before touching sReplies; should this ever
  512. // change, a mutex lock would be needed here. HB
  513. for (U32 i = 0, count = sReplies.size(); i < count; ++i)
  514. {
  515. LLEventPollReplies& reply = sReplies[i];
  516. LL_DEBUGS("EventPoll") << reply.mPollName << "Processing message: "
  517. << reply.mMessageName << LL_ENDL;
  518. LLMessageSystem::dispatch(reply.mMessageName, reply.mMessage);
  519. }
  520. sReplies.clear();
  521. }