From dd6178ca1f0ee24f52ac935c1c74405c269ed3d3 Mon Sep 17 00:00:00 2001 From: gered Date: Sun, 29 May 2022 12:54:02 -0400 Subject: [PATCH] improve wav file loading. handle buggy/naive cases better --- libretrogd/src/audio/wav.rs | 58 +++++++++++++++++----- libretrogd/test-assets/22khz_8bit_1ch.wav | Bin 11970 -> 11292 bytes libretrogd/test-assets/44khz_8bit_1ch.wav | Bin 0 -> 22540 bytes 3 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 libretrogd/test-assets/44khz_8bit_1ch.wav diff --git a/libretrogd/src/audio/wav.rs b/libretrogd/src/audio/wav.rs index b096a52..2185151 100644 --- a/libretrogd/src/audio/wav.rs +++ b/libretrogd/src/audio/wav.rs @@ -8,6 +8,7 @@ use sdl2::audio::AudioFormat; use thiserror::Error; use crate::audio::{AudioBuffer, AudioSpec}; +use crate::utils::io::StreamSize; #[derive(Error, Debug)] pub enum WavError { @@ -155,16 +156,29 @@ impl DataChunk { pub fn read( reader: &mut T, chunk_header: &SubChunkHeader, + is_probably_naive_file: bool, ) -> Result { - let mut buffer = vec![0u8; chunk_header.size as usize]; - let bytes_read = reader.read(&mut buffer)?; - // bunch of tools (like sfxr, jsfxr) sometimes generating "data" chunks that are too large. - // probably these tools are just incorrectly hard-coded to always assume 16-bit, because - // every time so far i have seen this, the data chunk size is exactly twice what the actual - // data size is for an 8-bit wav file. - // so, lets chop off the excess, so we don't have a very large amount of zero's at the end - // which would probably result in audio clicking if played as-is! - buffer.truncate(bytes_read); + let mut buffer; + if is_probably_naive_file { + // in this scenario, we have doubts about the chunk size being recorded correctly + // (the tool that created this file may have used a buggy calculation). we assume that + // in this case, this wav file is probably written in a "naive" manner and likely only + // contains "fmt" and "data" chunks with the "data" chunk being at the end of the file. + // if this assumption is correct, then we can just read everything until EOF here as + // the "data" chunk contents and that should be ok (assuming this file isn't corrupt + // anyway). + buffer = Vec::new(); + reader.read_to_end(&mut buffer)?; + } else { + // alternatively, this scenario means we are assuming the file was written out more + // properly and we will assume the chunk size is correct and read that many bytes. + // this is best if there is the possibility that there are more chunks than just "fmt" + // and "data" in this wav file and maybe they are in a different order, etc. + // it is important to note that this seems to be an uncommon case for wav files. most + // wav files seem to be written in a fairly "naive" manner. + buffer = vec![0u8; chunk_header.size as usize]; + reader.read_exact(&mut buffer)?; + } Ok(DataChunk { data: buffer.into_boxed_slice(), }) @@ -179,6 +193,8 @@ impl DataChunk { impl AudioBuffer { pub fn load_wav_bytes(reader: &mut T) -> Result { + let file_size = reader.stream_size()?; + let header = WavHeader::read(reader)?; if header.file_chunk.chunk_id.id != *b"RIFF" { return Err(WavError::BadFile(String::from( @@ -191,6 +207,17 @@ impl AudioBuffer { ))); } + // some tools like sfxr and jsfxr incorrectly calculate data sizes, seemingly using a + // hardcoded 16-bit sample size in their calculations even when the file being created has + // 8-bit sample sizes. this means the chunk size here and the "data" chunk size will be + // larger than they should be for the _actual_ data present in the file. + // of course, if the chunk size here is wrong, it could also mean a corrupt file. but we'll + // proceed regardless, with the assumption that an incorrect size here probably means that + // the file was created using these semi-broken tools and should act accordingly later on. + // if the file is actually corrupt (maybe truncated accidentally or something), then we'll + // hit an EOF earlier than expected somewhere too ... + let is_probably_naive_file = file_size - 8 != header.file_chunk.size as u64; + let mut format: Option = None; let mut data: Option = None; @@ -220,7 +247,7 @@ impl AudioBuffer { ))); } } else if chunk_header.chunk_id.id == *b"data" { - data = Some(DataChunk::read(reader, &chunk_header)?); + data = Some(DataChunk::read(reader, &chunk_header, is_probably_naive_file)?); } // move to the start of the next chunk (possibly skipping over the current chunk if we @@ -259,14 +286,21 @@ impl AudioBuffer { #[cfg(test)] mod tests { + use crate::audio::*; + use super::*; #[test] pub fn load_wav_file() -> Result<(), WavError> { let wav_buffer = AudioBuffer::load_wav_file(Path::new("./test-assets/22khz_8bit_1ch.wav"))?; - assert_eq!(22050, wav_buffer.spec().frequency()); + assert_eq!(AUDIO_FREQUENCY_22KHZ, wav_buffer.spec().frequency()); assert_eq!(1, wav_buffer.spec().channels()); - assert_eq!(8184, wav_buffer.data.len()); + assert_eq!(11248, wav_buffer.data.len()); + + let wav_buffer = AudioBuffer::load_wav_file(Path::new("./test-assets/44khz_8bit_1ch.wav"))?; + assert_eq!(AUDIO_FREQUENCY_44KHZ, wav_buffer.spec().frequency()); + assert_eq!(1, wav_buffer.spec().channels()); + assert_eq!(22496, wav_buffer.data.len()); Ok(()) } } diff --git a/libretrogd/test-assets/22khz_8bit_1ch.wav b/libretrogd/test-assets/22khz_8bit_1ch.wav index 153187e7cec919a76121400be34ef67ef3964187..915ad8c74bc3164fe3361b359dda5c8a0b2b20d3 100644 GIT binary patch literal 11292 zcmeIv*@_+46~^&4q##>zB=vO0n&-Ld%%?TP#sLLloFI`H z2cp->qh$a0mb^=qYMs+9f{m>u1ixBq?>b+tzWVd`-~Z9C4-T&X?Am*u98Z4o+E-uw zQ`A2`zIydnqVi|Yo;`i~bbU?K_3OX?``<);`Q_c;{8rTO@-M#ltEk0|8#iv=y!r88 zL_K-(gnx}Jq%yBp^Fa> zU+h-j{F5lT_`7!xv8dr)S*d!e)zjOd*u!^Z36CE?R)3Xyhl14bo~%?o)#~X;J$);~ z(dzw7irWrvR*y}bEI)#$#gR6SMd>FED6JbvA4-wws@UoK(X_g6W7sA_!w zkV@54B_9sI8LG!`e|YjTZ7WcYrwq2TyMeOIzkV|s6r8P(E^>I2Sa$VkR?}oLngdY zhum=RXhTznOz7G4S|KMiOPV_5oZczDR>**6K(DpQQ<`&{dXqBbgkEctHV6Ado{3T+ zkqCKwY@^jNh0Z3ZL=ke)9G#-GA#6~90@R=pIy;4PhZqnO` zk|Bp^L&(z_1*f6{RHHH44B-rUC_)9Aq76D-AR8qpLnd0G(-kyPh7#nU1v*8WDJoEe zR7f>V{m4 zTJkfeOQ=F3vB|-}LGuBH)&?q2ggn%s5n4}Rg#r|y3Qf>@1RLa`2o=acYc!Z62PG&& z7Fwc#3W+kB3HqUhT&-4WlL9NXNu)N3Rzqrz9F(98S!jWV3uvJXCCEWDG(;-{6(~X; zs?iv&rm#W*3Q&cnXf=j4@=$~dWFi%EhTbfs$t!n+EcqE7vgFG7A#=t%LfX8swMm;h zdXp~W)+S}hd?`w85~)q1<#ae2jY<>}mDO!$UjXV^g9GPf|Ml|2wP0(@*&ks3aydz}8dvwT@E4?984(gB%2YW;M9BhR&IM^4` z;NVh*tdP&a-c7bcE*1+FQeg37lTdClMafu{iyAaW3k&5aLO!a{1T73$q5uV`L`{DvOqdy!5dY`DZE5Thc~uDI=rV7vZl9V zlPaXi!M>2@oKhi?+9VR2oJ=e2*WeG&2tBkR?5X-mZ`aCrb`?LfZ6N zoAfxicSw)RFE=@x%~VK%nF@(iNHjC2q6!qE5EaNoGXs{$M-j@9jb>soHot|__b0tI|*gfdf7y7W$U`bc0sBJA6s>RQgp?^%Nc{~uWGjXKhE7PCq9jjm$kwW4 zNcjz&O?E@dvEJlsLKdP_NK~khC~!nYC_x@7kdq@hj*%SuNR6d@4kj>00m_ks3J=L~ zgyc9wt{l5Cg&_)2idy50=J_G@Sca4-WJr}leuE6D$2vuCNSUG)Qhq~UNVO{Y4KjuN zhTcuSS;(AHi$;;-x^9S40oO&jhf)+GJ@)A3^jhPHUP8}DYMh~fW{l#_m|pI`Iu7V% z^i&EtPH0B-x+!ExIo2U%3Kg>R4SR=drRe*H)|h1K8!i!2t!kf;X`W@7+N40nCcOY< z9wil^7{*ABQ>4Z|N*G5dM7`tin{k4C4yvC?ksL>;cPvxLu};w&Ql{t)DZfF7Z2gS> zhTf2!ac_#wH|UUR7jmpOsZ+F8)i>@9DQ`!qC^bf@PbDZsavY<8o*GN-7zf;mIT@i4 zCCEqVK4Y0e-c}*G+)A+{WZzh(=zhZ`LaG#k&v(IB9rKq-h;q1eg?+EOia(Sm0{`z?zXZP&K;`?CM{W+s`01QIUWE;p`*I(Cu_5u6Q#f&wasgw2e z?%XjZEL{&t?>abSWv%sHGT&w6MlW7?w2#vpgsmWz$2)sAImUj1mDgf3GlDRhYGQ2e z;6MA&Mxg3$m-NbkmG zuU-xCUR_X-ClIXj_kZQ3@v~+vNl3`Z%{6DSzW;A$c6Me;ilDM`kGHq5wUy7~ow#*t z*$XfD^7-C&cFykZOE{c&UVPCZGgEl@@JXrkp^lEu=+UF|ySkdo%a5#DwW|B+)1Yb7 z40Lsa)6>s1HO0HQtcj2JwYPWj^V=bn{`TO(0~U*AK67TdRC>zR*2=)Zz=_K>CWxg{ zsr2^k+f5fQid|e>7P8sNEiIP}4GmqRqBf+br{|ZI7QjbMcXoDuo*-=6+uIv!YipaE zn@+9>kYHDh- zGB%z%W5$df5{V?%+uK`TU*94%HFcjzB=WVkwmt|)9e{0-SS;QPpX&*yg1X7AsjI8I zcJt=VJMe2)8yg$#^z`&LZQ7I#SAAvo?%nxiWo5#QjEv~W$jGp;u#K&)t!?nJZBRA( z+rgRY>gx5u!NF#5NBcWEI*omMW&L(&X905tbzP>Bq3er!) zT{b(Jo143KgUk6~&N#5#-`{^%US8e~sP!OEkj&@vSHf{-V*xiNCT0ul;IH%Z^9wlx zzJW?>_k_xg{`tw1CqE+*9v&Wkk3r)*zziD0VA?-`%}iiA9xTiTF|PpgDx~{Y5Dyh) zy$C)aCWh+@!l1%ZlN}JxAS59vDQOp+_EAR~Nb&;Qw}B=4gozc%e;$J0;D)82CxSgE zkQqi`$4{UI4RIp622}nPal|LChbFjr1!`4C$2<;MGzN)lK~@?g2hnY|@5bf%0%Q-m zMpxH@^9cVvthgKzr(l6T$WjX=78+P0Ne@BF&mqxB4D=6?Na%!(N7X+r1+@ev1iF#F z{;SZ+Kmd22loU;V(Cg6RhK3WOu<$h?BxJrB#(|ar8@6B(p_u&??A5U_OjxX!@ikdk zWZ^40S95{11-cS~uiM8b5>*pOhk<~+Z%g9fDuN^)+m?(S{Q>(o>Q+rnADdvN=*mFr zk60OMGVlxOM`$Y;xetV>In&bk2T7kkB3=KS6g!PH2el|jrj9#Uiu+DgRTht8g1RVR zO{H)a6P5FaTKWUoT08`ZN>7AUL1oSZTgswn5kZjI9h z`pzJ4K+%d84V_r#&i(scGSzekLU|lSOS#21OxzVKTnSA0$cJdw zR;dP-6|LW?)OKC1w&bNs+!(mC>8eqkTm8+dcXn>Di0DHk9cmSFMxQ2&m)d$jcdSAc zk3&xCnx*~?3YDhVGkb1br8Iq`#+ANZzI?f{@|ebsYDR*HQf|Go~DM#g9)EiQ$P+OYrzR%oyvzVJxCY-Ckv^WR8G7 z%SzMxU~ZBtg~3WAWlt9Jf~t`{IQujo|5_g@UOk^-LI0>)+G(&4UgY%0LiId~2a10B z9eA#OA+w8d1LW%w?NfyZ0j)L`_1oh9qaWmPp4*SvNz2(ttHTcsI{r#^0*4EJ`8VJM zzmd^C$$WOi@_?*mX>ek->?~?R@jmjsCcLBn-Jst!4MX1BqmGtqtiBJ|Y6%8kO*{)) z1uFXBfy67}{sDsTY&54;6R12`)730r@nxFUTMDmid)r7v-!Kg}#_Kp)ajoptFkN80 zavhW+^$y{mnzyIYk^8i_cIbVm0DtNB43fBV2Dbj=yBQ@~_k0;H;Fi$G%8(o$`eF3oP@#MiOs=Ht2e9y{42&>}#s zXw{-3TD7QsJAEn4_4^#QM{5gbo%QZKh}rlv&&>ShIcMJa>6>r<=>1yl?oZx(T|I+42<~9zHzDjDvmnP^%|TYNsCl-<8$LWVX8) z!YQkj{qg;0-q=a!d$01uZL5>X=$|r#>+2^^Uh9>fd_8OicHNY60VMo}N13H4((F}~i26oj2tB}-Eccp_7Ps7Zx%WHdNeUhx*Nd8haY^+i_Sgay zxWwv9P=OZqxCzGK6su#89ncZP>U$P4B6(BZ#x%(@8tZ3tu9;*+<93sjkS*?LT<*<7 zws@U2Azd0>c{Zjk_j48UA}K9o`ZFsnWJwnn<#L~#DDdorIs+AEVHYhh0;kvscHx4q zAa*hZBd~>CSl~+)a>8qGKFJAFGfuL{EjONIk2@Nddq*J`1V332NcQB8LbiGIg*M6a zvh~sMtu!o(u!75}mN!b^- zv?o_-X?rsJogTCg2U}?&1sl`^jIcJg>VXQJV~quHT>w=^!dX_9^JxH3)hg$TK%m``JjX_Lg3*(953 zDR7?s&JJ@>fqiV*0V8mXHL;}&x`J3!3ki0lu@62UAw%*!gODNbz1<`wWS2WGO_QAQ zHq9BG#^vtPxhAAXqsJYWn6}*e81gJDZIU@8M}3l@Hc9NP+bSyr$0fB|*qH}>(| zhF}*v>w^lMVXYacz&^IWAxFH;=93)p-ZwQ#n>!jO`3yqZy!Xb-y`zwp%)%6MO7PaS z>|Ds6v=@{`XNYN`3y(@x71%zkHw5g39Nwy;43POR@$D-Aw9BB+apl>aIm@EDNBLVk~*C!s6ZQ=TVM!|unso&Ku-|sXd%I#G@3$A zXzWbrG=}Waxo06`f^R;_m|28etxeJaWx3Z((&3JC(|rwpRNw^bPCy0jZ^(eR*=~{nvsg39F^%0Z&o)l7Pv@GD zK8-$ioMGB>OUS-7nwEPnLr&AuCYk-t4%#F^mfNzhsc)6#3zlSAF&Kb-Z0djt9AnlP zRG@=R`(OaZ*b5di;uUu^$>;}6a!p8=TW-AEF0XA|?j4035!@OPNcN-_(xy}Pq!w~Q zX&(+Y;bJQS1FKh*jZNIrdI}t2y%DHD7n|5%07h63oA{tFi1oCPU=^~O;k7#&&7_0F1CcHugbZ5bH-^0NU8t1r-=${TMWcbm+YK zkPeT21|dU&Z$8P8Kr%@!q{l5co}|Ye$EHbEALI~^#+M-Ekk?)_$%sZfVnD`8x^xa{G){6r z+%H$in9?SRC0pMqDg=imIW87^paLVzi9iK-25@y YUA-ox1lsN~rmMw