llvorbisencode.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /**
  2. * @file vorbisencode.cpp
  3. * @brief Vorbis encoding routine routine for Indra.
  4. *
  5. * $LicenseInfo:firstyear=2000&license=viewergpl$
  6. *
  7. * Copyright (c) 2000-2009, Linden Research, Inc.
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #include "linden_common.h"
  33. #include "vorbis/vorbisenc.h"
  34. #include "llvorbisencode.h"
  35. #include "llmath.h"
  36. #include "llrand.h"
  37. constexpr S64 HEADER_SIZE = 44;
  38. S32 check_for_invalid_wav_formats(const std::string& in_fname,
  39. std::string& error_msg, F32 max_duration)
  40. {
  41. error_msg.clear();
  42. S64 physical_file_size = 0;
  43. LLFile infile(in_fname, "rb", &physical_file_size);
  44. if (!infile)
  45. {
  46. llwarns << "Could not open for read: " << in_fname << llendl;
  47. error_msg = "CannotUploadSoundFile";
  48. return LLVORBISENC_SOURCE_OPEN_ERR;
  49. }
  50. U8 wav_header[HEADER_SIZE];
  51. if (infile.read(wav_header, HEADER_SIZE) != HEADER_SIZE)
  52. {
  53. llwarns << "Could not open read wav header of file: " << in_fname
  54. << llendl;
  55. error_msg = "CannotUploadSoundFile";
  56. return LLVORBISENC_SOURCE_OPEN_ERR;
  57. }
  58. if (strncmp((char*)&(wav_header[0]), "RIFF", 4))
  59. {
  60. error_msg = "SoundFileNotRIFF";
  61. return LLVORBISENC_WAV_FORMAT_ERR;
  62. }
  63. if (strncmp((char*)&(wav_header[8]), "WAVE", 4))
  64. {
  65. error_msg = "SoundFileNotRIFF";
  66. return LLVORBISENC_WAV_FORMAT_ERR;
  67. }
  68. // Parse the chunks
  69. S64 chunk_length = 0;
  70. S64 raw_data_length = 0;
  71. U32 bytes_per_sec = 0;
  72. U32 sample_rate = 0;
  73. U32 bits_per_sample = 0;
  74. U16 num_channels = 0;
  75. bool uncompressed_pcm = false;
  76. // Start at the first chunk (usually fmt but not always)
  77. S64 file_pos = 12;
  78. while (file_pos + 8 < physical_file_size)
  79. {
  80. infile.seek(file_pos);
  81. infile.read(wav_header, HEADER_SIZE);
  82. chunk_length = ((U32)wav_header[7] << 24) +
  83. ((U32)wav_header[6] << 16) +
  84. ((U32)wav_header[5] << 8) + wav_header[4];
  85. if (chunk_length > physical_file_size - file_pos - 4)
  86. {
  87. error_msg = "SoundFileInvalidChunkSize";
  88. return LLVORBISENC_CHUNK_SIZE_ERR;
  89. }
  90. LL_DEBUGS("VorbisEncode") << "Chunk found: '" << wav_header[0]
  91. << wav_header[1] << wav_header[2]
  92. << wav_header[3] << "'" << LL_ENDL;
  93. if (!strncmp((char*)&(wav_header[0]), "fmt ", 4))
  94. {
  95. if (wav_header[8] == 0x01 && wav_header[9] == 0x00)
  96. {
  97. uncompressed_pcm = true;
  98. }
  99. num_channels = ((U16)wav_header[11] << 8) + wav_header[10];
  100. sample_rate = ((U32)wav_header[15] << 24) +
  101. ((U32)wav_header[14] << 16) +
  102. ((U32)wav_header[13] << 8) + wav_header[12];
  103. bits_per_sample = ((U16)wav_header[23] << 8) + wav_header[22];
  104. bytes_per_sec = ((U32)wav_header[19] << 24) +
  105. ((U32) wav_header[18] << 16) +
  106. ((U32) wav_header[17] << 8) + wav_header[16];
  107. }
  108. else if (!strncmp((char*)&(wav_header[0]), "data", 4))
  109. {
  110. raw_data_length = chunk_length;
  111. }
  112. file_pos += chunk_length + 8;
  113. chunk_length = 0;
  114. }
  115. if (!uncompressed_pcm)
  116. {
  117. error_msg = "SoundFileNotPCM";
  118. return LLVORBISENC_PCM_FORMAT_ERR;
  119. }
  120. if (num_channels < 1 || num_channels > LLVORBIS_CLIP_MAX_CHANNELS)
  121. {
  122. error_msg = "SoundFileInvalidChannelCount";
  123. return LLVORBISENC_MULTICHANNEL_ERR;
  124. }
  125. if (sample_rate != LLVORBIS_CLIP_SAMPLE_RATE)
  126. {
  127. error_msg = "SoundFileInvalidSampleRate";
  128. return LLVORBISENC_UNSUPPORTED_SAMPLE_RATE;
  129. }
  130. if (bits_per_sample != 16 && bits_per_sample != 8)
  131. {
  132. error_msg = "SoundFileInvalidWordSize";
  133. return LLVORBISENC_UNSUPPORTED_WORD_SIZE;
  134. }
  135. if (!raw_data_length)
  136. {
  137. error_msg = "SoundFileInvalidHeader";
  138. return LLVORBISENC_CLIP_TOO_LONG;
  139. }
  140. if (max_duration <= LLVORBIS_CLIP_MAX_TIME)
  141. {
  142. max_duration = LLVORBIS_CLIP_MAX_TIME;
  143. }
  144. F32 clip_length = (F32)raw_data_length / (F32)bytes_per_sec;
  145. if (clip_length > max_duration)
  146. {
  147. error_msg = "SoundFileInvalidTooLong";
  148. return LLVORBISENC_CLIP_TOO_LONG;
  149. }
  150. return LLVORBISENC_NOERR;
  151. }
  152. #define READ_BUFFER 1024
  153. S32 encode_vorbis_file(const std::string& in_fname,
  154. const std::string& out_fname, F32 max_duration)
  155. {
  156. S32 format_error = 0;
  157. std::string error_msg;
  158. if ((format_error = check_for_invalid_wav_formats(in_fname, error_msg,
  159. max_duration)))
  160. {
  161. llwarns << error_msg << ": " << in_fname << llendl;
  162. return format_error;
  163. }
  164. LLFile infile(in_fname, "rb");
  165. if (!infile)
  166. {
  167. llwarns << "Could not open sound file for reading and upload: "
  168. << in_fname << llendl;
  169. return LLVORBISENC_SOURCE_OPEN_ERR;
  170. }
  171. LLFile outfile(out_fname, "w+b");
  172. if (!outfile)
  173. {
  174. llwarns << "Could not open temporary ogg file for writing: "
  175. << in_fname << llendl;
  176. return LLVORBISENC_DEST_OPEN_ERR;
  177. }
  178. // Out of the data segment, not the stack
  179. U8 readbuffer[READ_BUFFER * 4 + HEADER_SIZE];
  180. // Take physcal pages, weld into a logical stream of packets
  181. ogg_stream_state os;
  182. // One Ogg bitstream page. Vorbis packets are inside
  183. ogg_page og;
  184. // One raw packet of data for decode
  185. ogg_packet op;
  186. // Structure storing all the static vorbis bitstream settings
  187. vorbis_info vi;
  188. // Structure storing all the user comments
  189. vorbis_comment vc;
  190. // Central working state for the packet->PCM decoder
  191. vorbis_dsp_state vd;
  192. // Local working space for packet->PCM decode
  193. vorbis_block vb;
  194. S32 eos = 0;
  195. S32 result;
  196. U16 num_channels = 0;
  197. U32 sample_rate = 0;
  198. U32 bits_per_sample = 0;
  199. U8 wav_header[HEADER_SIZE];
  200. S64 data_left = 0;
  201. // Parse the chunks
  202. S64 chunk_length = 0;
  203. // Start at the first chunk (usually fmt but not always)
  204. S64 file_pos = 12;
  205. while (!infile.eof() && infile.seek(file_pos) == file_pos &&
  206. infile.read(wav_header, HEADER_SIZE) == HEADER_SIZE)
  207. {
  208. chunk_length = ((U32)wav_header[7] << 24) +
  209. ((U32)wav_header[6] << 16) +
  210. ((U32)wav_header[5] << 8) + wav_header[4];
  211. LL_DEBUGS("VorbisEncode") << "Chunk found: '" << wav_header[0]
  212. << wav_header[1] << wav_header[2]
  213. << wav_header[3] << "'" << LL_ENDL;
  214. if (!strncmp((char*)&(wav_header[0]), "fmt ", 4))
  215. {
  216. num_channels = ((U16) wav_header[11] << 8) + wav_header[10];
  217. sample_rate = ((U32) wav_header[15] << 24) +
  218. ((U32) wav_header[14] << 16) +
  219. ((U32) wav_header[13] << 8) + wav_header[12];
  220. bits_per_sample = ((U16) wav_header[23] << 8) + wav_header[22];
  221. }
  222. else if (!strncmp((char*)&(wav_header[0]), "data", 4))
  223. {
  224. infile.seek(file_pos + 8);
  225. // Leave the file pointer at the beginning of the data chunk data
  226. data_left = chunk_length;
  227. break;
  228. }
  229. file_pos += chunk_length + 8;
  230. chunk_length = 0;
  231. }
  232. //********** Encode setup ************//
  233. // Choose an encoding mode:
  234. // (mode 0: 44kHz stereo uncoupled, roughly 128kbps VBR)
  235. vorbis_info_init(&vi);
  236. // Always encode to mono
  237. #if 1
  238. // SL-52913 & SL-53779 determined this quality level to be our 'good
  239. // enough' general-purpose quality level with a nice low bitrate.
  240. // Equivalent to oggenc -q0.5
  241. F32 quality = 0.05f;
  242. #else
  243. F32 quality = (bitrate == 128000 ? 0.4f : 0.1f);
  244. #endif
  245. if (vorbis_encode_init_vbr(&vi, /*num_channels*/ 1, sample_rate, quality))
  246. {
  247. llwarns << "Unable to initialize vorbis CODEC at quality "
  248. << quality << llendl;
  249. return LLVORBISENC_DEST_OPEN_ERR;
  250. }
  251. // Add a comment
  252. vorbis_comment_init(&vc);
  253. #if 0
  254. vorbis_comment_add(&vc, "Linden");
  255. #endif
  256. // Set up the analysis state and auxiliary encoding storage
  257. vorbis_analysis_init(&vd, &vi);
  258. vorbis_block_init(&vd, &vb);
  259. // Set up our packet->stream encoder. Pick a random serial number; that way
  260. // we can more likely build chained streams just by concatenation.
  261. ogg_stream_init(&os, ll_rand());
  262. // Vorbis streams begin with three headers; the initial header (with most
  263. // of the CODEC setup parameters) which is mandated by the Ogg bitstream
  264. // spec. The second header holds any comment fields. The third header holds
  265. // the bitstream codebook. We merely need to make the headers, then pass
  266. // them to libvorbis one at a time; libvorbis handles the additional Ogg
  267. // bitstream constraints.
  268. {
  269. ogg_packet header;
  270. ogg_packet header_comm;
  271. ogg_packet header_code;
  272. vorbis_analysis_headerout(&vd, &vc, &header, &header_comm,
  273. &header_code);
  274. // automatically placed in its own page:
  275. ogg_stream_packetin(&os, &header);
  276. ogg_stream_packetin(&os, &header_comm);
  277. ogg_stream_packetin(&os, &header_code);
  278. // We do not have to write out here, but doing so makes streaming much
  279. // easier, so we do, flushing ALL pages. This ensures the actual audio
  280. // data will start on a new page
  281. while (!eos)
  282. {
  283. S32 result = ogg_stream_flush(&os, &og);
  284. if (result == 0) break;
  285. outfile.write((U8*)og.header, og.header_len);
  286. outfile.write((U8*)og.body, og.body_len);
  287. }
  288. }
  289. while (!eos)
  290. {
  291. S64 bytes_per_sample = bits_per_sample / 8;
  292. // stereo hardwired here
  293. S64 n = llclamp((S64)(READ_BUFFER * num_channels * bytes_per_sample),
  294. S64(0), data_left);
  295. S64 bytes = infile.read(readbuffer, n);
  296. if (bytes != n)
  297. {
  298. // End of file. This can be done implicitly in the mainline, but
  299. // it is easier to see here in non-clever fashion. Tell the library
  300. // we are at end of stream so that it can handle the last frame and
  301. // mark end of stream in the output properly
  302. vorbis_analysis_wrote(&vd, 0);
  303. }
  304. else
  305. {
  306. data_left -= bytes;
  307. // Expose the buffer to submit data
  308. F32** buffer = vorbis_analysis_buffer(&vd, READ_BUFFER);
  309. S64 i = 0;
  310. S64 samples = bytes / (num_channels * bytes_per_sample);
  311. S32 temp;
  312. if (num_channels == 2)
  313. {
  314. if (bytes_per_sample == 2)
  315. {
  316. // Uninterleave samples
  317. for (i = 0; i < samples; ++i)
  318. {
  319. temp = ((char*)readbuffer)[i * 4 + 1];
  320. temp += ((char*)readbuffer)[i * 4 + 3];
  321. temp <<= 8;
  322. temp += readbuffer[i * 4];
  323. temp += readbuffer[i * 4 + 2];
  324. buffer[0][i] = F32(temp) * (1.f / 65536.f);
  325. }
  326. }
  327. else
  328. {
  329. // Presume it is 1 byte per which is unsigned (F#@%ing wav
  330. // "standard")
  331. // Uninterleave samples
  332. for (i = 0; i < samples; ++i)
  333. {
  334. temp = readbuffer[i * 2];
  335. temp += readbuffer[i * 2 + 1];
  336. temp -= 256;
  337. buffer[0][i] = F32(temp) * (1.f / 256.f);
  338. }
  339. }
  340. }
  341. else if (num_channels == 1)
  342. {
  343. if (bytes_per_sample == 2)
  344. {
  345. for (i = 0; i < samples; ++i)
  346. {
  347. temp = ((char*)readbuffer)[i * 2 + 1];
  348. temp <<= 8;
  349. temp += readbuffer[i * 2];
  350. buffer[0][i] = F32(temp) * (1.f / 32768.f);
  351. }
  352. }
  353. else
  354. {
  355. // Presume it is 1 byte per which is unsigned (F#@%ing wav
  356. // "standard")
  357. for (i = 0; i < samples; ++i)
  358. {
  359. temp = readbuffer[i];
  360. temp -= 128;
  361. buffer[0][i] = F32(temp) * (1.f / 128.f);
  362. }
  363. }
  364. }
  365. // Tell the library how much we actually submitted
  366. vorbis_analysis_wrote(&vd,i);
  367. }
  368. // Vorbis does some data preanalysis, then divvies up blocks for more
  369. // involved (potentially parallel) processing. Get a single block for
  370. // encoding now.
  371. while (vorbis_analysis_blockout(&vd, &vb) == 1)
  372. {
  373. // Analysis. Do the main analysis, creating a packet.
  374. vorbis_analysis(&vb, NULL);
  375. vorbis_bitrate_addblock(&vb);
  376. while (vorbis_bitrate_flushpacket(&vd, &op))
  377. {
  378. // Weld the packet into the bitstream
  379. ogg_stream_packetin(&os, &op);
  380. // Write out pages (if any)
  381. while (!eos)
  382. {
  383. result = ogg_stream_pageout(&os,&og);
  384. if (result == 0) break;
  385. outfile.write((U8*)og.header, og.header_len);
  386. outfile.write((U8*)og.body, og.body_len);
  387. // This could be set above, but for illustrative purposes,
  388. // I do it here (to show that vorbis does know where the
  389. // stream ends).
  390. if (ogg_page_eos(&og))
  391. {
  392. eos = 1;
  393. }
  394. }
  395. }
  396. }
  397. }
  398. // Clean up and exit. vorbis_info_clear() must be called last. ogg_page and
  399. // ogg_packet structs always point to storage in libvorbis. They are never
  400. // freed or manipulated directly.
  401. ogg_stream_clear(&os);
  402. vorbis_block_clear(&vb);
  403. vorbis_dsp_clear(&vd);
  404. vorbis_comment_clear(&vc);
  405. vorbis_info_clear(&vi);
  406. llinfos << "Vorbis encoding done." << llendl;
  407. return LLVORBISENC_NOERR;
  408. }