This commit is contained in:
Sonic Dreamcaster 2025-02-05 02:47:35 -03:00
commit eaaea790d9
16 changed files with 1285 additions and 53 deletions

View File

@ -14,7 +14,7 @@ jobs:
- name: Update machine
run: sudo apt update
- name: Install dependencies
run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev
run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev libvorbis-dev
- name: Install latest SDL
run: |
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"

View File

@ -12,7 +12,7 @@ jobs:
submodules: recursive
- name: Install dependencies
run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog
run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog vorbis-tools
- name: Build
run: |
cmake -H. -Bbuild-cmake -GNinja -DCMAKE_BUILD_TYPE=Release

View File

@ -81,7 +81,7 @@ jobs:
submodules: recursive
- name: Install dependencies
run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog
run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog vorbis-tools
- name: Build
run: |
cmake -H. -Bbuild-cmake -GNinja -DCMAKE_BUILD_TYPE=Release
@ -115,7 +115,7 @@ jobs:
- name: Update machine
run: sudo apt update
- name: Install dependencies
run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev
run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev libvorbis-dev
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2.14

View File

@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR)
# Set the project version and language
project(Starship VERSION 0.1.0 LANGUAGES C CXX ASM)
@ -26,7 +27,7 @@ include(cmake/automate-vcpkg.cmake)
set(VCPKG_TRIPLET x64-windows-static)
set(VCPKG_TARGET_TRIPLET x64-windows-static)
vcpkg_install_packages(zlib bzip2 libzip libpng sdl2 glew glfw3 nlohmann-json tinyxml2 spdlog)
vcpkg_install_packages(zlib bzip2 libzip libpng sdl2 glew glfw3 nlohmann-json tinyxml2 spdlog libogg libvorbis)
@ -188,6 +189,19 @@ if (MSVC)
#=================== SSE2NEON ===================
set(SSE2NEON_DIR ${CMAKE_BINARY_DIR}/_deps/sse2neon)
file(DOWNLOAD "" "${SSE2NEON_DIR}/sse2neon.h")
GIT_TAG da35f9d6c7374a95353fd1df1d394d44ab66cf01
# Libultraship Integration #
@ -224,6 +238,7 @@ include_directories(
add_subdirectory(libultraship ${CMAKE_CURRENT_SOURCE_DIR}/libultraship)
@ -280,8 +295,17 @@ endif()
find_package(Ogg CONFIG REQUIRED)
find_package(Vorbis CONFIG REQUIRED)
elseif(CMAKE_SYSTEM_NAME STREQUAL "NintendoSwitch")
@ -295,8 +319,14 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "CafeOS")
find_package(Ogg REQUIRED)
find_package(Vorbis REQUIRED)

View File

@ -0,0 +1,31 @@
force: true
- '#include "sys.h"'
- '#include "sf64audio_provisional.h"'
driver: SF64
size: 0x3AFD0
offset: 0xE9950
size: 0x1CB20
offset: 0x1183A0
size: 0x691AF0
offset: 0x134EC0
{ type: NAUDIO:V1:AUDIO_TABLE, format: SAMPLE, offset: 0xC1460, symbol: gSampleBankTableInit }
{ type: NAUDIO:V1:AUDIO_TABLE, format: SEQUENCE, offset: 0xC14A0, symbol: gSeqTableInit }
{ type: NAUDIO:V1:AUDIO_TABLE, format: SOUNDFONT, offset: 0xC18D0, symbol: gSoundFontTableInit }
{ type: ARRAY, count: 283, array_type: u8, offset: 0xC1AF0, symbol: gSeqFontTableInit }

View File

@ -0,0 +1,61 @@
# - Find ogg
# Find the native ogg includes and libraries
# OGG_INCLUDE_DIRS - where to find ogg.h, etc.
# OGG_LIBRARIES - List of libraries when using ogg.
# OGG_FOUND - True if ogg found.
# Already in cache, be silent
endif ()
find_package (PkgConfig QUIET)
pkg_check_modules (PC_OGG QUIET ogg>=1.3.0)
find_path (OGG_INCLUDE_DIR ogg/ogg.h
# MSVC built ogg may be named ogg_static.
# The provided project files name the library with the lib prefix.
find_library (OGG_LIBRARY
# Handle the QUIETLY and REQUIRED arguments and set OGG_FOUND
# to TRUE if all listed variables are TRUE.
include (FindPackageHandleStandardArgs)
find_package_handle_standard_args (Ogg
if(NOT TARGET Ogg::ogg)
add_library(Ogg::ogg UNKNOWN IMPORTED)
set_target_properties(Ogg::ogg PROPERTIES
endif ()
endif ()
mark_as_advanced (OGG_INCLUDE_DIR OGG_LIBRARY)

View File

@ -0,0 +1,197 @@
Finds the native vorbis, vorbisenc amd vorbisfile includes and libraries.
Imported Targets
This module provides the following imported targets, if found:
The Vorbis library
The VorbisEnc library
The VorbisFile library
Result Variables
This will define the following variables:
List of include directories when using vorbis.
List of include directories when using vorbisenc.
List of include directories when using vorbisfile.
List of libraries when using vorbis.
List of libraries when using vorbisenc.
List of libraries when using vorbisfile.
True if vorbis and requested components found.
True if vorbis found.
True if vorbisenc found.
True if vorbisfile found.
Cache variables
The following cache variables may also be set:
The directory containing ``vorbis/vorbis.h``.
The directory containing ``vorbis/vorbisenc.h``.
The directory containing ``vorbis/vorbisenc.h``.
The path to the vorbis library.
The path to the vorbisenc library.
The path to the vorbisfile library.
A user may set ``Vorbis_ROOT`` to a vorbis installation root to tell this module where to look.
if (Vorbis_Vorbis_INCLUDE_DIR)
# Already in cache, be silent
endif ()
set (Vorbis_Vorbis_FIND_QUIETLY TRUE)
set (Vorbis_Enc_FIND_QUIETLY TRUE)
set (Vorbis_File_FIND_QUIETLY TRUE)
find_package (Ogg QUIET)
find_package (PkgConfig QUIET)
pkg_check_modules (PC_Vorbis_Vorbis QUIET vorbis)
pkg_check_modules (PC_Vorbis_Enc QUIET vorbisenc)
pkg_check_modules (PC_Vorbis_File QUIET vorbisfile)
set (Vorbis_VERSION ${PC_Vorbis_Vorbis_VERSION})
find_path (Vorbis_Vorbis_INCLUDE_DIR vorbis/codec.h
find_path (Vorbis_Enc_INCLUDE_DIR vorbis/vorbisenc.h
find_path (Vorbis_File_INCLUDE_DIR vorbis/vorbisfile.h
find_library (Vorbis_Vorbis_LIBRARY
find_library (Vorbis_Enc_LIBRARY
find_library (Vorbis_File_LIBRARY
include (FindPackageHandleStandardArgs)
if (Vorbis_Vorbis_LIBRARY AND Vorbis_Vorbis_INCLUDE_DIR AND Ogg_FOUND)
set (Vorbis_Vorbis_FOUND TRUE)
endif ()
if (Vorbis_Enc_LIBRARY AND Vorbis_Enc_INCLUDE_DIR AND Vorbis_Vorbis_FOUND)
set (Vorbis_Enc_FOUND TRUE)
endif ()
if (Vorbis_Vorbis_FOUND AND Vorbis_File_LIBRARY AND Vorbis_File_INCLUDE_DIR)
set (Vorbis_File_FOUND TRUE)
endif ()
find_package_handle_standard_args (Vorbis
if (Vorbis_Vorbis_FOUND)
if (NOT TARGET Vorbis::vorbis)
add_library (Vorbis::vorbis UNKNOWN IMPORTED)
set_target_properties (Vorbis::vorbis PROPERTIES
endif ()
if (Vorbis_Enc_FOUND)
set (Vorbis_Enc_INCLUDE_DIRS ${Vorbis_Enc_INCLUDE_DIR})
set (Vorbis_Enc_LIBRARIES ${Vorbis_Enc_LIBRARY} ${Vorbis_Enc_LIBRARIES})
if (NOT TARGET Vorbis::vorbisenc)
add_library (Vorbis::vorbisenc UNKNOWN IMPORTED)
set_target_properties (Vorbis::vorbisenc PROPERTIES
endif ()
endif ()
if (Vorbis_File_FOUND)
set (Vorbis_File_INCLUDE_DIRS ${Vorbis_File_INCLUDE_DIR})
set (Vorbis_File_LIBRARIES ${Vorbis_File_LIBRARY} ${Vorbis_File_LIBRARIES})
if (NOT TARGET Vorbis::vorbisfile)
add_library (Vorbis::vorbisfile UNKNOWN IMPORTED)
set_target_properties (Vorbis::vorbisfile PROPERTIES
endif ()
endif ()
endif ()
mark_as_advanced (Vorbis_Vorbis_INCLUDE_DIR Vorbis_Vorbis_LIBRARY)
mark_as_advanced (Vorbis_Enc_INCLUDE_DIR Vorbis_Enc_LIBRARY)
mark_as_advanced (Vorbis_File_INCLUDE_DIR Vorbis_File_LIBRARY)

View File

@ -83,34 +83,34 @@ C:\Program Files\CMake\bin\cmake.exe --build build-cmake --target clean
#### Debian/Ubuntu
# using gcc
apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev
apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev ibvorbis-dev
# or using clang
apt-get install clang git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev
apt-get install clang git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev libvorbis-dev
#### Arch
# using gcc
pacman -S gcc git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost
pacman -S gcc git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost libogg libvorbis
# or using clang
pacman -S clang git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost
pacman -S clang git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost libogg libvorbis
#### Fedora
# using gcc
dnf install gcc gcc-c++ git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel
dnf install gcc gcc-c++ git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel libogg-devel libvorbis-devel
# or using clang
dnf install clang git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel
dnf install clang git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel libogg-devel libvorbis-devel
#### openSUSE
# using gcc
zypper in gcc gcc-c++ git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel
zypper in gcc gcc-c++ git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel libogg-devel libvorbis-devel
# or using clang
zypper in clang libstdc++-devel git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel
zypper in clang libstdc++-devel git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel libogg-devel libvorbis-devel
### Build
@ -160,7 +160,7 @@ cmake --build build-cmake --target clean
## macOS
Requires Xcode (or xcode-tools) && `sdl2, libpng, glew, ninja, cmake, nlohmann-json, libzip` (can be installed via homebrew, macports, etc)
Requires Xcode (or xcode-tools) && `sdl2, libpng, glew, ninja, cmake, nlohmann-json, libzip, vorbis-tools` (can be installed via homebrew, macports, etc)
**Important: For maximum performance make sure you have ninja build tools installed!**

View File

@ -1036,24 +1036,13 @@ Acmd* AudioSynth_ProcessNote(s32 noteIndex, NoteSubEu* noteSub, NoteSynthesisSta
goto skip;
case CODEC_S16:
aLoadBuffer(aList++, OS_K0_TO_PHYSICAL(bookSample->sampleAddr + synthState->samplePosInt * 2), DMEM_UNCOMPRESSED_NOTE,
(numSamplesToLoadAdj + SAMPLES_PER_FRAME) * 2);
flags = A_CONTINUE;
skipBytes = 0;
size_t bytesToRead;
numSamplesProcessed += numSamplesToLoadAdj;
numSamplesProcessed = numSamplesToLoadAdj;
dmemUncompressedAddrOffset1 = numSamplesToLoadAdj;
if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj)*SAMPLE_SIZE) < bookSample->size) {
bytesToRead = (numSamplesToLoadAdj)*SAMPLE_SIZE;
} else {
bytesToRead = bookSample->size - (synthState->samplePosInt * 2);
// @port [Custom audio]
// TLDR samples are loaded async and might be null the first time they are played.
// See note in AudioSampleFactory.cpp
if (sampleAddr != NULL) {
aLoadBuffer(cmd++, sampleAddr + (synthState->samplePosInt * 2), DMEM_UNCOMPRESSED_NOTE,
goto skip;

View File

@ -3,12 +3,68 @@
#include <string.h>
#include <stdio.h>
#include <macros.h>
#include "mixer.h"
#ifndef __clang__
#pragma GCC optimize ("unroll-loops")
#if defined(__SSE2__) || defined(__aarch64__)
#pragma message("Warning: SSE2 support is not available. Code will not compile")
#if defined(__SSE2__)
#include <emmintrin.h>
#elif defined(__aarch64__)
#include "sse2neon.h"
typedef struct {
__m128i lo, hi;
} m256i;
static m256i m256i_mul_epi16(__m128i a, __m128i b) {
m256i res;
res.lo = _mm_mullo_epi16(a, b);
res.hi = _mm_mulhi_epi16(a, b);
m256i ret;
ret.lo = _mm_unpacklo_epi16(res.lo, res.hi);
ret.hi = _mm_unpackhi_epi16(res.lo, res.hi);
return ret;
static m256i m256i_add_m256i_epi32(m256i a, m256i b) {
m256i res;
res.lo = _mm_add_epi32(a.lo, b.lo);
res.hi = _mm_add_epi32(a.hi, b.hi);
return res;
static m256i m256i_add_m128i_epi32(m256i a, __m128i b) {
m256i res;
res.lo = _mm_add_epi32(a.lo, b);
res.hi = _mm_add_epi32(a.hi, b);
return res;
static m256i m256i_srai(m256i a, int b) {
m256i res;
res.lo = _mm_srai_epi32(a.lo, b);
res.hi = _mm_srai_epi32(a.hi, b);
return res;
static __m128i m256i_clamp_to_m128i(m256i a) {
return _mm_packs_epi32(a.lo, a.hi);
#define ROUND_UP_64(v) (((v) + 63) & ~63)
#define ROUND_UP_32(v) (((v) + 31) & ~31)
#define ROUND_UP_16(v) (((v) + 15) & ~15)
@ -218,6 +274,8 @@ void aSetLoopImpl(ADPCM_STATE *adpcm_loop_state) {
rspa.adpcm_loop_state = adpcm_loop_state;
void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) {
uint8_t *in = BUF_U8(;
int16_t *out = BUF_S16(rspa.out);
@ -269,6 +327,133 @@ void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) {
memcpy(state, out - 16, 16 * sizeof(int16_t));
static uint16_t lower_4bit[] = {
static uint16_t lower_2bit[] = {
void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) {
uint8_t* in = BUF_U8(;
int16_t* out = BUF_S16(rspa.out);
int nbytes = ROUND_UP_32(rspa.nbytes);
if (flags & A_INIT) {
memset(out, 0, 16 * sizeof(int16_t));
} else if (flags & A_LOOP) {
memcpy(out, rspa.adpcm_loop_state, 16 * sizeof(int16_t));
} else {
memcpy(out, state, 16 * sizeof(int16_t));
out += 16;
__m128i mask_4bit = _mm_loadl_epi64((__m128i*) lower_4bit);
__m128i mask_2bit = _mm_loadl_epi64((__m128i*) lower_2bit);
while (nbytes > 0) {
int shift = *in >> 4; // should be in 0..12 or 0..14
__m128i shift_vec = _mm_set1_epi16(shift);
int table_index = *in++ & 0xf; // should be in 0..7
int16_t(*tbl)[8] = rspa.adpcm_table[table_index];
for (int i = 0; i < 2; i++) {
int16_t ins[8];
int16_t prev1 = out[-1];
int16_t prev2 = out[-2];
__m128i prev1_vec = _mm_set1_epi16(prev1);
__m128i prev2_vec = _mm_set1_epi16(prev2);
__m128i ins_vec;
if (flags & 4) {
ins_vec = _mm_loadu_si16((__m128i*) in);
ins_vec = _mm_unpacklo_epi8(ins_vec, _mm_setzero_si128());
__m128i in_vec_up2bit = _mm_srli_epi16(ins_vec, 6);
__m128i in_vec_uplower2bit = _mm_and_si128(_mm_srli_epi16(ins_vec, 4), mask_2bit);
__m128i in_vec_lowerup2bit = _mm_and_si128(_mm_srli_epi16(ins_vec, 2), mask_2bit);
__m128i in_vec_lower2bit = _mm_and_si128(ins_vec, mask_2bit);
__m128i in_vec_up = _mm_unpacklo_epi16(in_vec_up2bit, in_vec_uplower2bit);
in_vec_up = _mm_shuffle_epi32(in_vec_up, _MM_SHUFFLE(3, 1, 2, 0));
__m128i in_vec_low = _mm_unpacklo_epi16(in_vec_lower2bit, in_vec_lowerup2bit);
in_vec_low = _mm_shuffle_epi32(in_vec_low, _MM_SHUFFLE(3, 1, 2, 0));
ins_vec = _mm_unpacklo_epi32(in_vec_up, in_vec_low);
ins_vec = _mm_slli_epi16(ins_vec, 14);
ins_vec = _mm_srai_epi16(ins_vec, 14);
ins_vec = _mm_slli_epi16(ins_vec, shift);
in += 2;
} else {
ins_vec = _mm_loadu_si32((__m128i*) in);
ins_vec = _mm_unpacklo_epi8(ins_vec, _mm_setzero_si128());
__m128i in_vec_up4bit = _mm_srli_epi16(ins_vec, 4);
__m128i in_vec_lower4bit = _mm_and_si128(ins_vec, mask_4bit);
ins_vec = _mm_unpacklo_epi16(in_vec_up4bit, in_vec_lower4bit);
ins_vec = _mm_slli_epi16(ins_vec, 12);
ins_vec = _mm_srai_epi16(ins_vec, 12);
ins_vec = _mm_slli_epi16(ins_vec, shift);
in += 4;
_mm_storeu_si128((__m128i*) ins, ins_vec);
for (int j = 0; j < 2; j++) {
__m128i tbl0_vec = _mm_loadu_si64((__m128i*) (tbl[0] + (j * 4)));
__m128i tbl1_vec = _mm_loadu_si64((__m128i*) (tbl[1] + (j * 4)));
m256i res;
res.lo = _mm_mullo_epi16(tbl0_vec, prev2_vec);
res.hi = _mm_mulhi_epi16(tbl0_vec, prev2_vec);
tbl0_vec = _mm_unpacklo_epi16(res.lo, res.hi);
res.lo = _mm_mullo_epi16(tbl1_vec, prev1_vec);
res.hi = _mm_mulhi_epi16(tbl1_vec, prev1_vec);
tbl1_vec = _mm_unpacklo_epi16(res.lo, res.hi);
__m128i acc_vec = _mm_add_epi32(tbl0_vec, tbl1_vec);
__m128i shift_ins = _mm_srai_epi32(j ? _mm_unpackhi_epi16(_mm_setzero_si128(), ins_vec)
: _mm_unpacklo_epi16(_mm_setzero_si128(), ins_vec),
acc_vec = _mm_add_epi32(acc_vec, shift_ins);
tbl1_vec = _mm_loadu_si128((__m128i*) tbl[1]);
if (j == 0) {
tbl1_vec = _mm_slli_si128(tbl1_vec, (1 - 0) * 8 + 2);
} else {
tbl1_vec = _mm_slli_si128(tbl1_vec, (1 - 1) * 8 + 2);
for (int k = 0; k < ((j + 1) * 4); k++) {
__m128i ins_vec2 = _mm_set1_epi16(ins[k]);
res.lo = _mm_mullo_epi16(tbl1_vec, ins_vec2);
res.hi = _mm_mulhi_epi16(tbl1_vec, ins_vec2);
__m128i mult = _mm_unpackhi_epi16(res.lo, res.hi);
acc_vec = _mm_add_epi32(acc_vec, mult);
tbl1_vec = _mm_slli_si128(tbl1_vec, 2);
acc_vec = _mm_srai_epi32(acc_vec, 11);
acc_vec = _mm_packs_epi32(acc_vec, _mm_setzero_si128());
_mm_storeu_si64((__m128*) out, acc_vec);
out += 4;
nbytes -= 16 * sizeof(int16_t);
memcpy(state, out - 16, 16 * sizeof(int16_t));
void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) {
int16_t tmp[16];
int16_t *in_initial = BUF_S16(;
@ -320,6 +505,171 @@ void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) {
memcpy(state + 8, in, 8 * sizeof(int16_t));
static const ALIGN_ASSET(16) int32_t x4000[4] = {
static void mm128_transpose(__m128i* r0, __m128i* r1, __m128i* r2, __m128i* r3) {
__m128 tmp0, tmp1, tmp2, tmp3;
__m128 row0, row1, row2, row3;
row0 = _mm_castsi128_ps(*r0);
row1 = _mm_castsi128_ps(*r1);
row2 = _mm_castsi128_ps(*r2);
row3 = _mm_castsi128_ps(*r3);
tmp0 = _mm_shuffle_ps(row0, row1, _MM_SHUFFLE(2, 0, 2, 0)); // 0 2 4 6
tmp1 = _mm_shuffle_ps(row0, row1, _MM_SHUFFLE(3, 1, 3, 1)); // 1 3 5 7
tmp2 = _mm_shuffle_ps(row2, row3, _MM_SHUFFLE(2, 0, 2, 0)); // 8 a c e
tmp3 = _mm_shuffle_ps(row2, row3, _MM_SHUFFLE(3, 1, 3, 1)); // 9 b d f
row0 = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2, 0, 2, 0)); // 0 4 8 c
row1 = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2, 0, 2, 0)); // 1 5 9 d
row2 = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3, 1, 3, 1)); // 2 6 a e
row3 = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3, 1, 3, 1)); // 3 7 b f
*r0 = _mm_castps_si128(row0);
*r1 = _mm_castps_si128(row1);
*r2 = _mm_castps_si128(row2);
*r3 = _mm_castps_si128(row3);
static __m128i move_two_4x16(int16_t* a, int16_t* b) {
return _mm_set_epi64(_mm_movepi64_pi64(_mm_loadl_epi64((__m128i*) a)),
_mm_movepi64_pi64(_mm_loadl_epi64((__m128i*) b)));
void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) {
int16_t tmp[32];
int16_t* in_initial = BUF_S16(;
int16_t* in = in_initial;
int16_t* out = BUF_S16(rspa.out);
int nbytes = ROUND_UP_16(rspa.nbytes);
uint32_t pitch_accumulator;
int i;
if (flags & A_INIT) {
memset(tmp, 0, 5 * sizeof(int16_t));
} else {
memcpy(tmp, state, 16 * sizeof(int16_t));
if (flags & 2) {
memcpy(in - 8, tmp + 8, 8 * sizeof(int16_t));
in -= tmp[5] / sizeof(int16_t);
in -= 4;
pitch_accumulator = (uint16_t) tmp[4];
memcpy(in, tmp, 4 * sizeof(int16_t));
__m128i x4000Vec = _mm_load_si128((__m128i*) x4000);
do {
for (i = 0; i < 2; i++) {
int16_t* tbl0 = resample_table[pitch_accumulator * 64 >> 16];
int16_t* in0 = in;
pitch_accumulator += (pitch << 1);
in += pitch_accumulator >> 16;
pitch_accumulator %= 0x10000;
int16_t* tbl1 = resample_table[pitch_accumulator * 64 >> 16];
int16_t* in1 = in;
pitch_accumulator += (pitch << 1);
in += pitch_accumulator >> 16;
pitch_accumulator %= 0x10000;
int16_t* tbl2 = resample_table[pitch_accumulator * 64 >> 16];
int16_t* in2 = in;
pitch_accumulator += (pitch << 1);
in += pitch_accumulator >> 16;
pitch_accumulator %= 0x10000;
int16_t* tbl3 = resample_table[pitch_accumulator * 64 >> 16];
int16_t* in3 = in;
pitch_accumulator += (pitch << 1);
in += pitch_accumulator >> 16;
pitch_accumulator %= 0x10000;
__m128i vec_in0 = move_two_4x16(in1, in0);
__m128i vec_tbl0 = move_two_4x16(tbl1, tbl0);
__m128i vec_in1 = move_two_4x16(in3, in2);
__m128i vec_tbl1 = move_two_4x16(tbl3, tbl2);
// we multiply in by tbl
m256i res;
res.lo = _mm_mullo_epi16(vec_in0, vec_tbl0);
res.hi = _mm_mulhi_epi16(vec_in0, vec_tbl0);
__m128i out0_vec = _mm_unpacklo_epi16(res.lo, res.hi);
__m128i out1_vec = _mm_unpackhi_epi16(res.lo, res.hi);
res.lo = _mm_mullo_epi16(vec_in1, vec_tbl1);
res.hi = _mm_mulhi_epi16(vec_in1, vec_tbl1);
__m128i out2_vec = _mm_unpacklo_epi16(res.lo, res.hi);
__m128i out3_vec = _mm_unpackhi_epi16(res.lo, res.hi);
// transpose to more easily make a sum at the end
mm128_transpose(&out0_vec, &out1_vec, &out2_vec, &out3_vec);
// add 0x4000
out0_vec = _mm_add_epi32(out0_vec, x4000Vec);
out1_vec = _mm_add_epi32(out1_vec, x4000Vec);
out2_vec = _mm_add_epi32(out2_vec, x4000Vec);
out3_vec = _mm_add_epi32(out3_vec, x4000Vec);
// shift by 15
out0_vec = _mm_srai_epi32(out0_vec, 15);
out1_vec = _mm_srai_epi32(out1_vec, 15);
out2_vec = _mm_srai_epi32(out2_vec, 15);
out3_vec = _mm_srai_epi32(out3_vec, 15);
// sum all to make sample
__m128i sample_vec = _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(out0_vec, out1_vec), out2_vec), out3_vec);
// at the end we do this below but four time
// sample = ((in[0] * tbl[0] + 0x4000) >> 15) + ((in[1] * tbl[1] + 0x4000) >> 15) +
// ((in[2] * tbl[2] + 0x4000) >> 15) + ((in[3] * tbl[3] + 0x4000) >> 15);
sample_vec = _mm_packs_epi32(sample_vec, _mm_setzero_si128());
_mm_storeu_si64(out, sample_vec);
out += 4;
nbytes -= 8 * sizeof(int16_t);
} while (nbytes > 0);
state[4] = (int16_t) pitch_accumulator;
memcpy(state, in, 4 * sizeof(int16_t));
i = (in - in_initial + 4) & 7;
in -= i;
if (i != 0) {
i = -8 - i;
state[5] = i;
memcpy(state + 8, in, 8 * sizeof(int16_t));
void aEnvSetup1Impl(uint8_t initial_vol_wet, uint16_t rate_wet, uint16_t rate_left, uint16_t rate_right) {
rspa.vol_wet = (uint16_t)(initial_vol_wet << 8);
rspa.rate_wet = rate_wet;
@ -332,6 +682,8 @@ void aEnvSetup2Impl(uint16_t initial_vol_left, uint16_t initial_vol_right) {
rspa.vol[1] = initial_vol_right;
void aEnvMixerImpl(uint16_t in_addr, uint16_t n_samples, bool swap_reverb,
bool neg_3, bool neg_2,
bool neg_left, bool neg_right,
@ -368,6 +720,64 @@ void aEnvMixerImpl(uint16_t in_addr, uint16_t n_samples, bool swap_reverb,
} while (n > 0);
// SSE2 optimized version of algorithm
void aEnvMixerImpl(uint16_t in_addr, uint16_t n_samples, bool swap_reverb,
bool neg_3, bool neg_2,
bool neg_left, bool neg_right,
int32_t wet_dry_addr, u32 unk)
int16_t *in = BUF_S16(in_addr);
int16_t *dry[2] = {BUF_S16(((wet_dry_addr >> 24) & 0xFF) << 4), BUF_S16(((wet_dry_addr >> 16) & 0xFF) << 4)};
int16_t *wet[2] = {BUF_S16(((wet_dry_addr >> 8) & 0xFF) << 4), BUF_S16(((wet_dry_addr) & 0xFF) << 4)};
int16_t negs[4] = {neg_left ? -1 : 0, neg_right ? -1 : 0, neg_3 ? -4 : 0, neg_2 ? -2 : 0};
int n = ROUND_UP_16(n_samples);
const int n_aligned = n - (n % 8);
uint16_t vols[2] = {rspa.vol[0], rspa.vol[1]};
uint16_t rates[2] = {rspa.rate[0], rspa.rate[1]};
uint16_t vol_wet = rspa.vol_wet;
uint16_t rate_wet = rspa.rate_wet;
const __m128i* in_ptr = (__m128i*)in;
const __m128i* d_ptr[2] = { (__m128i*) dry[0], (__m128i*) dry[1] };
const __m128i* w_ptr[2] = { (__m128i*) wet[0], (__m128i*) wet[1] };
// Aligned loop
for (int N = 0; N < n_aligned; N+=8) {
// Init vectors
const __m128i in_channels = _mm_loadu_si128(in_ptr++);
__m128i d[2] = { _mm_loadu_si128(d_ptr[0]), _mm_loadu_si128(d_ptr[1]) };
__m128i w[2] = { _mm_loadu_si128(w_ptr[0]), _mm_loadu_si128(w_ptr[1]) };
// Compute base samples
// sample = ((in * vols) >> 16) ^ negs
__m128i s[2] = {
_mm_xor_si128(_mm_mulhi_epi16(in_channels, _mm_set1_epi16(vols[0])), _mm_set1_epi16(negs[0])),
_mm_xor_si128(_mm_mulhi_epi16(in_channels, _mm_set1_epi16(vols[1])), _mm_set1_epi16(negs[1]))
// Compute left swapped samples
// (sample * vol_wet) >> 16) ^ negs
__m128i ss[2] = {
_mm_xor_si128(_mm_mulhi_epi16(s[swap_reverb], _mm_set1_epi16(vol_wet)), _mm_set1_epi16(negs[2])),
_mm_xor_si128(_mm_mulhi_epi16(s[!swap_reverb], _mm_set1_epi16(vol_wet)), _mm_set1_epi16(negs[3]))
// Store values to buffers
for (int j = 0; j < 2; j++) {
_mm_storeu_si128((__m128i*) d_ptr[j]++, _mm_adds_epi16(s[j], d[j]));
_mm_storeu_si128((__m128i*) w_ptr[j]++, _mm_adds_epi16(ss[j], w[j]));
vols[j] += rates[j];
vol_wet += rate_wet;
void aMixImpl(uint16_t count, int16_t gain, uint16_t in_addr, uint16_t out_addr) {
int nbytes = ROUND_UP_32(ROUND_DOWN_16(count << 4));
int16_t *in = BUF_S16(in_addr);
@ -395,6 +805,71 @@ void aMixImpl(uint16_t count, int16_t gain, uint16_t in_addr, uint16_t out_addr)
static const ALIGN_ASSET(16) int16_t x7fff[8] = {
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
void aMixImpl(uint16_t count, int16_t gain, uint16_t in_addr, uint16_t out_addr) {
int nbytes = ROUND_UP_32(ROUND_DOWN_16(count << 4));
int16_t* in = BUF_S16(in_addr);
int16_t* out = BUF_S16(out_addr);
int i;
int32_t sample;
if (gain == -0x8000) {
while (nbytes > 0) {
for (unsigned int i = 0; i < 2; i++) {
__m128i outVec = _mm_loadu_si128((__m128i*) out);
__m128i inVec = _mm_loadu_si128((__m128i*) in);
__m128i subsVec = _mm_subs_epi16(outVec, inVec);
_mm_storeu_si128((__m128i*) out, subsVec);
nbytes -= 8 * sizeof(int16_t);
in += 8;
out += 8;
__m128i x7fffVec = _mm_load_si128((__m128i*) x7fff);
__m128i x4000Vec = _mm_load_si128((__m128i*) x4000);
__m128i gainVec = _mm_set1_epi16(gain);
while (nbytes > 0) {
for (i = 0; i < 2; i++) {
// Load input and output data into vectors
__m128i outVec = _mm_loadu_si128((__m128i*) out);
__m128i inVec = _mm_loadu_si128((__m128i*) in);
// Multiply `out` by `0x7FFF` producing 32 bit results, and store the upper and lower bits in each vector.
// Equivalent to `out[0..8] * 0x7FFF`
m256i outx7fff = m256i_mul_epi16(outVec, x7fffVec);
// Same as above but for in and gain. Equivalent to `in[0..8] * gain`
m256i inxGain = m256i_mul_epi16(inVec, gainVec);
in += 8;
// Now we have 4 32 bit elements. Continue the calculaton per the reference implementation.
// We already did out + 0x7fff and in * gain.
// *out * 0x7fff + *in++ * gain is the final result of these two calculations.
m256i addVec = m256i_add_m256i_epi32(outx7fff, inxGain);
// Add 0x4000
addVec = m256i_add_m128i_epi32(addVec, x4000Vec);
// Shift over by 15
m256i shiftedVec = m256i_srai(addVec, 15);
// Convert each 32 bit element to 16 bit with saturation (clamp) and store in `outVec`
outVec = m256i_clamp_to_m128i(shiftedVec);
// Write the final vector back to memory
// The final calculation is ((out[0..8] * 0x7fff + in[0..8] * gain) + 0x4000) >> 15;
_mm_storeu_si128((__m128i*) out, outVec);
out += 8;
nbytes -= 16 * sizeof(int16_t);
void aS8DecImpl(uint8_t flags, ADPCM_STATE state) {
uint8_t *in = BUF_U8(;
int16_t *out = BUF_S16(rspa.out);

View File

@ -237,12 +237,16 @@ GameEngine::GameEngine() {
loader->RegisterResourceFactory(std::make_shared<SF64::ResourceFactoryBinarySampleV1>(), RESOURCE_FORMAT_BINARY,
"Sample", static_cast<uint32_t>(SF64::ResourceType::Sample), 1);
loader->RegisterResourceFactory(std::make_shared<SF64::ResourceFactoryBinarySampleV2>(), RESOURCE_FORMAT_BINARY,
"Sample", static_cast<uint32_t>(SF64::ResourceType::Sample), 2);
loader->RegisterResourceFactory(std::make_shared<SF64::ResourceFactoryXMLSampleV0>(), RESOURCE_FORMAT_XML,
"Sample", static_cast<uint32_t>(SF64::ResourceType::Sample), 0);
loader->RegisterResourceFactory(std::make_shared<SF64::ResourceFactoryBinarySoundFontV0>(), RESOURCE_FORMAT_BINARY,
"SoundFont", static_cast<uint32_t>(SF64::ResourceType::SoundFont), 0);
loader->RegisterResourceFactory(std::make_shared<SF64::ResourceFactoryXMLSoundFontV0>(), RESOURCE_FORMAT_XML,
"SoundFont", static_cast<uint32_t>(SF64::ResourceType::SoundFont), 0);
prevAltAssets = CVarGetInteger("gEnhancements.Mods.AlternateAssets", 0);
gEnableGammaBoost = CVarGetInteger("gGraphics.GammaMode", 0) == 0;

View File

@ -1,6 +1,14 @@
#include "SampleFactory.h"
#include "../ResourceUtil.h"
#include "port/resource/type/audio/Sample.h"
#include "sf64audio_provisional.h"
#include <dr_wav.h>
#include <dr_mp3.h>
#include "vorbis/vorbisfile.h"
namespace SF64 {
std::shared_ptr<Ship::IResource> ResourceFactoryBinarySampleV1::ReadResource(std::shared_ptr<Ship::File> file) {
@ -23,7 +31,7 @@ std::shared_ptr<Ship::IResource> ResourceFactoryBinarySampleV1::ReadResource(std
if(sample->mSample.codec == 2){
sample->mSample.medium = 2;
for(size_t i = 0; i < sample->mSample.size / 2; i++){
int16_t* sampleData = (int16_t*) sample->mSample.sampleAddr;
auto sampleData = (int16_t*) sample->mSample.sampleAddr;
sampleData[i] = BSWAP16(sampleData[i]);
} else {
@ -35,36 +43,236 @@ std::shared_ptr<Ship::IResource> ResourceFactoryBinarySampleV1::ReadResource(std
return sample;
std::shared_ptr<Ship::IResource> ResourceFactoryBinarySampleV2::ReadResource(std::shared_ptr<Ship::File> file) {
static size_t VorbisReadCallback(void* out, size_t size, size_t elems, void* src) {
OggFileData* data = static_cast<OggFileData*>(src);
size_t toRead = size * elems;
if (toRead > data->size - data->pos) {
toRead = data->size - data->pos;
memcpy(out, static_cast<uint8_t*>(data->data) + data->pos, toRead);
data->pos += toRead;
return toRead / size;
static int VorbisSeekCallback(void* src, ogg_int64_t pos, int whence) {
OggFileData* data = static_cast<OggFileData*>(src);
size_t newPos;
switch (whence) {
case SEEK_SET:
newPos = pos;
case SEEK_CUR:
newPos = data->pos + pos;
case SEEK_END:
newPos = data->size + pos;
return -1;
if (newPos > data->size) {
return -1;
data->pos = newPos;
return 0;
static int VorbisCloseCallback([[maybe_unused]] void* src) {
return 0;
static long VorbisTellCallback(void* src) {
OggFileData* data = static_cast<OggFileData*>(src);
return data->pos;
static const ov_callbacks vorbisCallbacks = {
static void Mp3DecoderWorker(std::shared_ptr<Sample> sample, std::shared_ptr<Ship::File> sampleFile) {
drmp3 mp3;
drwav_uint64 numFrames;
drmp3_bool32 ret =
drmp3_init_memory(&mp3, sampleFile->Buffer->data(), sampleFile->Buffer->size(), nullptr);
numFrames = drmp3_get_pcm_frame_count(&mp3);
drwav_uint64 channels = mp3.channels;
drwav_uint64 sampleRate = mp3.sampleRate;
sample->mSample.tuning = (float)(sampleRate * channels) / 32000.0f;
sample->mSample.size = numFrames * channels * 2;
sample->mSample.sampleAddr = new uint8_t[sample->mSample.size];
drmp3_read_pcm_frames_s16(&mp3, numFrames, (int16_t*)sample->mSample.sampleAddr);
static void OggDecoderWorker(std::shared_ptr<Sample> sample, std::shared_ptr<Ship::File> sampleFile) {
OggVorbis_File vf;
char dataBuff[4096];
long read = 0;
size_t pos = 0;
OggFileData fileData = {
.data = sampleFile->Buffer.get()->data(),
.pos = 0,
.size = sampleFile->Buffer.get()->size(),
int ret = ov_open_callbacks(&fileData, &vf, nullptr, 0, vorbisCallbacks);
vorbis_info* vi = ov_info(&vf, -1);
uint64_t numFrames = ov_pcm_total(&vf, -1);
uint64_t sampleRate = vi->rate;
uint64_t numChannels = vi->channels;
int bitStream = 0;
size_t toRead = numFrames * numChannels * 2;
sample->mSample.sampleAddr = new uint8_t[toRead];
sample->mSample.tuning = (float)(sampleRate * numChannels) / 32000.0f;
do {
read = ov_read(&vf, dataBuff, 4096, 0, 2, 1, &bitStream);
memcpy(sample->mSample.sampleAddr + pos, dataBuff, read);
pos += read;
} while (read != 0);
std::shared_ptr<Ship::IResource> ResourceFactoryXMLSampleV0::ReadResource(std::shared_ptr<Ship::File> file) {
if (!FileHasValidFormatAndReader(file)) {
return nullptr;
auto sample = std::make_shared<Sample>(file->InitData);
auto reader = std::get<std::shared_ptr<Ship::BinaryReader>>(file->Reader);
auto child = std::get<std::shared_ptr<tinyxml2::XMLDocument>>(file->Reader)->FirstChildElement();
std::shared_ptr<Ship::ResourceInitData> initData = std::make_shared<Ship::ResourceInitData>();
const char* customFormatStr = child->Attribute("CustomFormat");
memset(&sample->mSample, 0, sizeof(sample->mSample));
sample->mSample.isRelocated = 0;
sample->mSample.codec = CodecStrToInt(child->Attribute("Codec"), file->InitData->Path.c_str());
sample->mSample.medium = MediumStrToInt(child->Attribute("Medium"));
sample->mSample.unk = child->IntAttribute("bit26");
sample->mSample.codec = reader->ReadUByte();
sample->mSample.medium = reader->ReadUByte();
sample->mSample.unk = reader->ReadUByte();
sample->mSample.size = reader->ReadUInt32();
sample->mSample.tuning = reader->ReadFloat();
sample->mSample.loop = LoadChild<AdpcmLoopData*>(reader->ReadUInt64());
sample-> = LoadChild<AdpcmBookData*>(reader->ReadUInt64());
sample->mSample.sampleAddr = new uint8_t[sample->mSample.size];
reader->Read((char*) sample->mSample.sampleAddr, sample->mSample.size);
if(sample->mSample.codec == 2){
sample->mSample.medium = 2;
for(size_t i = 0; i < sample->mSample.size / 2; i++){
int16_t* sampleData = (int16_t*) sample->mSample.sampleAddr;
sampleData[i] = BSWAP16(sampleData[i]);
tinyxml2::XMLElement* loopRoot = child->FirstChildElement("ADPCMLoop");
if (loopRoot != nullptr) {
size_t i = 0;
sample->mSample.loop = new AdpcmLoopData();
sample->mSample.loop->start = loopRoot->UnsignedAttribute("Start");
sample->mSample.loop->end = loopRoot->UnsignedAttribute("End");
sample->mSample.loop->count = loopRoot->UnsignedAttribute("Count");
tinyxml2::XMLElement* predictor = loopRoot->FirstChildElement("Predictor");
while (predictor != nullptr) {
sample->mSample.loop->predictorState[i++] = predictor->IntAttribute("State");
predictor = predictor->NextSiblingElement();
} else {
sample->mSample.medium = 0;
tinyxml2::XMLElement* bookRoot = child->FirstChildElement("ADPCMBook");
if (bookRoot != nullptr) {
size_t i = 0;
sample-> = new AdpcmBookData();
sample->>numPredictors = bookRoot->IntAttribute("Npredictors");
sample->>order = bookRoot->IntAttribute("Order");
tinyxml2::XMLElement* book = bookRoot->FirstChildElement("Book");
size_t numBooks = sample->>numPredictors * sample->>order * 8;
sample->>book = new int16_t[numBooks];
while (book != nullptr) {
sample->>book[i++] = book->IntAttribute("Page");
book = book->NextSiblingElement();
size_t size = child->Int64Attribute("Size");
sample->mSample.size = size;
const char* path = child->Attribute("Path");
initData->Path = path;
initData->IsCustom = false;
initData->ByteOrder = Ship::Endianness::Native;
auto sampleFile = Ship::Context::GetInstance()->GetResourceManager()->GetArchiveManager()->LoadFile(path, initData);
if (customFormatStr != nullptr) {
// Compressed files can take a really long time to decode (~250ms per).
// This worked when we tested it (09/04/2024) (Works on my machine)
if (strcmp(customFormatStr, "wav") == 0) {
drwav wav;
drwav_uint64 numFrames;
drwav_bool32 ret =
drwav_init_memory(&wav, sampleFile->Buffer->data(), sampleFile->Buffer->size(), nullptr);
drwav_get_length_in_pcm_frames(&wav, &numFrames);
sample->mSample.tuning = (float)(wav.sampleRate * wav.channels) / 32000.0f;
sample->mSample.size = numFrames * wav.channels * 2;
sample->mSample.sampleAddr = new uint8_t[sample->mSample.size];
drwav_read_pcm_frames_s16(&wav, numFrames, (int16_t*)sample->mSample.sampleAddr);
return sample;
} else if (strcmp(customFormatStr, "ogg") == 0) {
std::thread fileDecoderThread = std::thread(OggDecoderWorker, sample, sampleFile);
return sample;
} else if (strcmp(customFormatStr, "mp3") == 0) {
std::thread fileDecoderThread = std::thread(Mp3DecoderWorker, sample, sampleFile);
return sample;
// Not a normal streamed sample. Fallback to the original ADPCM sample to be decoded by the audio engine.
sample->mSample.sampleAddr = new uint8_t[size];
// Can't use memcpy due to endianness issues.
for (uint32_t i = 0; i < size; i++) {
sample->mSample.sampleAddr[i] = (*sampleFile->Buffer)[i];
sample->mSample.isRelocated = 1;
return sample;
uint8_t ResourceFactoryXMLSampleV0::CodecStrToInt(const char* str, const char* file) {
if (strcmp("ADPCM", str) == 0) {
} else if (strcmp("S8", str) == 0) {
return CODEC_S8;
} else if (strcmp("S16MEM", str) == 0) {
} else if (strcmp("ADPCMSMALL", str) == 0) {
} else if (strcmp("REVERB", str) == 0) {
} else if (strcmp("S16", str) == 0) {
return CODEC_S16;
} else {
char buff[2048];
snprintf(buff, 2048,
"Invalid codec in %s. Got %s, expected ADPCM, S8, S16MEM, ADPCMSMALL, REVERB, S16, UNK6, UNK7.", file,
throw std::runtime_error(buff);
uint32_t ResourceFactoryXMLSampleV0::MediumStrToInt(const char* str) {
if (!strcmp("Ram", str)) {
return 0;
} else if (!strcmp("Unk", str)) {
return 1;
} else if (!strcmp("Cart", str)) {
return 2;
} else if (!strcmp("Disk", str)) {
return 3;
// 4 is skipped
} else if (!strcmp("RamUnloaded", str)) {
return 5;
} else {
char buff[2048];
snprintf(buff, 2048,
"Bad medium value. Got %s, expected Ram, Unk, Cart, or Disk.", str);
throw std::runtime_error(buff);
} // namespace LUS

View File

@ -1,16 +1,26 @@
#pragma once
#include "Resource.h"
#include "ResourceFactoryXML.h"
#include "ResourceFactoryBinary.h"
namespace SF64 {
struct OggFileData {
void* data;
size_t pos;
size_t size;
class ResourceFactoryBinarySampleV1 : public Ship::ResourceFactoryBinary {
std::shared_ptr<Ship::IResource> ReadResource(std::shared_ptr<Ship::File> file) override;
class ResourceFactoryBinarySampleV2 : public Ship::ResourceFactoryBinary {
class ResourceFactoryXMLSampleV0 : public Ship::ResourceFactoryXML {
std::shared_ptr<Ship::IResource> ReadResource(std::shared_ptr<Ship::File> file) override;
static uint8_t CodecStrToInt(const char* str, const char* file);
static uint32_t MediumStrToInt(const char* str);
}; // namespace LUS

View File

@ -1,5 +1,7 @@
#include "SoundFontFactory.h"
#include "../ResourceUtil.h"
#include "utils/StringHelper.h"
#include <sf64audio_provisional.h>
#include "port/resource/type/audio/SoundFont.h"
namespace SF64 {
@ -29,4 +31,214 @@ std::shared_ptr<Ship::IResource> ResourceFactoryBinarySoundFontV0::ReadResource(
return font;
int8_t ResourceFactoryXMLSoundFontV0::MediumStrToInt(const char* str) {
if (!strcmp("Ram", str)) {
return MEDIUM_RAM;
} else if (!strcmp("Unk", str)) {
return MEDIUM_UNK;
} else if (!strcmp("Cart", str)) {
} else if (!strcmp("Disk", str)) {
// 4 is skipped
} else {
throw std::runtime_error(
StringHelper::Sprintf("Bad medium value. Got %s, expected Ram, Unk, Cart, or Disk.", str));
int8_t ResourceFactoryXMLSoundFontV0::CachePolicyToInt(const char* str) {
if (!strcmp("Temporary", str)) {
} else if (!strcmp("Persistent", str)) {
} else if (!strcmp("Either", str)) {
} else if (!strcmp("Permanent", str)) {
} else {
throw std::runtime_error(StringHelper::Sprintf(
"Bad cache policy value. Got %s, expected Temporary, Persistent, Either, or Permanent.", str));
void ResourceFactoryXMLSoundFontV0::ParseDrums(SoundFont* soundFont, tinyxml2::XMLElement* element) {
element = (tinyxml2::XMLElement*)element->FirstChildElement();
// No drums
if (element == nullptr) {
soundFont->mFont.drums = nullptr;
soundFont->mFont.numDrums = 0;
do {
auto drum = new DrumData;
std::vector<EnvelopePointData> envelopes;
drum->adsrDecayIndex = element->IntAttribute("ReleaseRate");
drum->pan = element->IntAttribute("Pan");
drum->isRelocated = element->IntAttribute("Loaded");
drum->tunedSample.tuning = element->FloatAttribute("Tuning");
const char* sampleStr = element->Attribute("SampleRef");
if (sampleStr != nullptr && sampleStr[0] != 0) {
auto res = Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr);
drum->tunedSample.sample = static_cast<SampleData*>(res ? res->GetRawPointer() : nullptr);
} else {
drum->tunedSample.sample = nullptr;
element = (tinyxml2::XMLElement*)element->FirstChildElement();
if (!strcmp(element->Name(), "Envelopes")) {
// element = (tinyxml2::XMLElement*)element->FirstChildElement();
unsigned int envCount = 0;
envelopes = ParseEnvelopes(soundFont, element, &envCount);
element = (tinyxml2::XMLElement*)element->Parent();
drum->envelope = new EnvelopePointData[envelopes.size()];
memcpy(drum->envelope,, envelopes.size() * sizeof(EnvelopePointData));
} else {
drum->envelope = nullptr;
if (drum->tunedSample.sample == nullptr) {
} else {
element = element->NextSiblingElement();
} while (element != nullptr);
soundFont->mFont.numDrums = soundFont->mDrums.size();
soundFont->mFont.drums = soundFont->;
void ResourceFactoryXMLSoundFontV0::ParseInstruments(SoundFont* soundFont, tinyxml2::XMLElement* element) {
element = element->FirstChildElement();
do {
auto instrument = new InstrumentData;
unsigned int envCount = 0;
std::vector<EnvelopePointData> envelopes;
int isValid = element->BoolAttribute("IsValid");
instrument->isRelocated = element->IntAttribute("Loaded");
instrument->normalRangeLo = element->IntAttribute("NormalRangeLo");
instrument->normalRangeHi = element->IntAttribute("NormalRangeHi");
instrument->adsrDecayIndex = element->IntAttribute("ReleaseRate");
tinyxml2::XMLElement* instrumentElement = element->FirstChildElement();
tinyxml2::XMLElement* instrumentElementCopy = instrumentElement;
if (instrumentElement != nullptr && !strcmp(instrumentElement->Name(), "Envelopes")) {
envelopes = ParseEnvelopes(soundFont, instrumentElement, &envCount);
instrument->envelope = new EnvelopePointData[envelopes.size()];
memcpy(instrument->envelope,, envelopes.size() * sizeof(EnvelopePointData));
instrumentElement = instrumentElement->NextSiblingElement();
if (instrumentElement != nullptr && !strcmp("LowNotesSound", instrumentElement->Name())) {
instrument->lowPitchTunedSample.tuning = instrumentElement->FloatAttribute("Tuning");
const char* sampleStr = instrumentElement->Attribute("SampleRef");
if (sampleStr != nullptr && sampleStr[0] != 0) {
auto res = static_pointer_cast<Sample>(
Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr, true));
auto sample = static_cast<SampleData*>(res ? res->GetRawPointer() : nullptr);
instrument->lowPitchTunedSample.sample = sample;
if (sample != nullptr && sample->tuning != 0.0f) {
instrument->lowPitchTunedSample.tuning = sample->tuning;
instrumentElement = instrumentElement->NextSiblingElement();
if (instrumentElement != nullptr && !strcmp("NormalNotesSound", instrumentElement->Name())) {
instrument->normalPitchTunedSample.tuning = instrumentElement->FloatAttribute("Tuning");
const char* sampleStr = instrumentElement->Attribute("SampleRef");
if (sampleStr != nullptr && sampleStr[0] != 0) {
auto res = static_pointer_cast<Sample>(
Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr, true));
auto sample = static_cast<SampleData*>(res ? res->GetRawPointer() : nullptr);
instrument->normalPitchTunedSample.sample = sample;
if (sample != nullptr && sample->tuning != 0.0f) {
instrument->normalPitchTunedSample.tuning = sample->tuning;
instrumentElement = instrumentElement->NextSiblingElement();
if (instrumentElement != nullptr && !strcmp("HighNotesSound", instrumentElement->Name())) {
instrument->highPitchTunedSample.tuning = instrumentElement->FloatAttribute("Tuning");
const char* sampleStr = instrumentElement->Attribute("SampleRef");
if (sampleStr != nullptr && sampleStr[0] != 0) {
auto res = static_pointer_cast<Sample>(
Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr, true));
auto sample = static_cast<SampleData*>(res ? res->GetRawPointer() : nullptr);
instrument->highPitchTunedSample.sample = sample;
if (sample != nullptr && sample->tuning != 0.0f) {
instrument->highPitchTunedSample.tuning = sample->tuning;
instrumentElement = instrumentElement->NextSiblingElement();
element = instrumentElementCopy;
element = (tinyxml2::XMLElement*)element->Parent();
element = element->NextSiblingElement();
} while (element != nullptr);
soundFont->mFont.instruments = soundFont->;
soundFont->mFont.numInstruments = soundFont->mInstruments.size();
std::vector<EnvelopePointData> ResourceFactoryXMLSoundFontV0::ParseEnvelopes(SoundFont* soundFont,
tinyxml2::XMLElement* element,
unsigned int* count) {
std::vector<EnvelopePointData> envelopes;
unsigned int total = 0;
element = element->FirstChildElement("Envelope");
while (element != nullptr) {
EnvelopePointData env = {
.delay = (s16)element->IntAttribute("Delay"),
.arg = (s16)element->IntAttribute("Arg"),
element = element->NextSiblingElement("Envelope");
*count = total;
return envelopes;
std::shared_ptr<Ship::IResource> ResourceFactoryXMLSoundFontV0::ReadResource(std::shared_ptr<Ship::File> file) {
if (!FileHasValidFormatAndReader(file)) {
return nullptr;
auto audioSoundFont = std::make_shared<SoundFont>(file->InitData);
auto child = std::get<std::shared_ptr<tinyxml2::XMLDocument>>(file->Reader)->FirstChildElement();
// Header data
memset(&audioSoundFont->mFont, 0, sizeof(audioSoundFont->mFont));
auto shortData1 = child->IntAttribute("Data1");
auto shortData2 = child->IntAttribute("Data2");
audioSoundFont->mFont.numInstruments = (shortData2 >> 8) & 0xFFu;
audioSoundFont->mFont.numDrums = shortData2 & 0xFFu;
audioSoundFont->mFont.sampleBankId1 = (shortData1 >> 8) & 0xFFu;
audioSoundFont->mFont.sampleBankId2 = shortData1 & 0xFFu;
child = (tinyxml2::XMLElement*)child->FirstChildElement();
while (child != nullptr) {
const char* name = child->Name();
if (!strcmp(name, "Drums")) {
ParseDrums(audioSoundFont.get(), child);
} else if (!strcmp(name, "Instruments")) {
ParseInstruments(audioSoundFont.get(), child);
child = child->NextSiblingElement();
return audioSoundFont;
} // namespace LUS

View File

@ -1,11 +1,26 @@
#pragma once
#include "Resource.h"
#include "ResourceFactoryXML.h"
#include "ResourceFactoryBinary.h"
#include "port/resource/type/audio/SoundFont.h"
namespace SF64 {
class ResourceFactoryBinarySoundFontV0 : public Ship::ResourceFactoryBinary {
std::shared_ptr<Ship::IResource> ReadResource(std::shared_ptr<Ship::File> file) override;
class ResourceFactoryXMLSoundFontV0 : public Ship::ResourceFactoryXML {
std::shared_ptr<Ship::IResource> ReadResource(std::shared_ptr<Ship::File> file) override;
static int8_t MediumStrToInt(const char* str);
static int8_t CachePolicyToInt(const char* str);
void ParseDrums(SoundFont* soundFont, tinyxml2::XMLElement* element);
void ParseInstruments(SoundFont* soundFont, tinyxml2::XMLElement* element);
std::vector<EnvelopePointData> ParseEnvelopes(SoundFont* soundFont, tinyxml2::XMLElement* element,
unsigned int* count);
}; // namespace LUS

@ -1 +1 @@
Subproject commit 28dcd128b0406a43ab7ef9718213f7ab7d3736f8
Subproject commit 27af72331ceba7703f0382bb0316320baed377a3