From 5f0bd6e9f052821ab8d0d8394b9df4467176b6a0 Mon Sep 17 00:00:00 2001 From: KiritoDv Date: Sun, 2 Feb 2025 23:34:11 -0600 Subject: [PATCH 1/6] First implementation of louist system --- .github/workflows/linux.yml | 2 +- .github/workflows/mac.yml | 2 +- .github/workflows/main.yml | 4 +- CMakeLists.txt | 26 +- assets/yaml/cn/rev0/ast_audio.yaml | 31 +++ cmake/modules/FindOgg.cmake | 61 +++++ cmake/modules/FindVorbis.cmake | 197 ++++++++++++++ docs/BUILDING.md | 18 +- src/audio/audio_synthesis.c | 3 +- src/port/Engine.cpp | 5 +- .../importers/audio/SampleFactory.cpp | 244 ++++++++++++++++-- .../resource/importers/audio/SampleFactory.h | 12 +- tools/Torch | 2 +- 13 files changed, 567 insertions(+), 40 deletions(-) create mode 100644 assets/yaml/cn/rev0/ast_audio.yaml create mode 100644 cmake/modules/FindOgg.cmake create mode 100644 cmake/modules/FindVorbis.cmake diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 251ddcc8..c0b68d29 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -14,7 +14,7 @@ jobs: - name: Update machine run: sudo apt update - name: Install dependencies - run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev + run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev libvorbis-dev - name: Install latest SDL run: | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 2cc8c768..f70c52e2 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -12,7 +12,7 @@ jobs: with: submodules: recursive - name: Install dependencies - run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog + run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog vorbis-tools - name: Build run: | cmake -H. -Bbuild-cmake -GNinja -DCMAKE_BUILD_TYPE=Release diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 160e17e2..54ac983f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -81,7 +81,7 @@ jobs: with: submodules: recursive - name: Install dependencies - run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog + run: brew install sdl2 libpng glew ninja cmake libzip nlohmann-json tinyxml2 spdlog vorbis-tools - name: Build run: | cmake -H. -Bbuild-cmake -GNinja -DCMAKE_BUILD_TYPE=Release @@ -115,7 +115,7 @@ jobs: - name: Update machine run: sudo apt update - name: Install dependencies - run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev + run: sudo apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev libvorbis-dev - name: ccache uses: hendrikmuhs/ccache-action@v1.2.14 with: diff --git a/CMakeLists.txt b/CMakeLists.txt index 557a23c4..ffe5a58f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.16.0 FATAL_ERROR) # Set the project version and language project(Starship VERSION 0.1.0 LANGUAGES C CXX ASM) +include(FetchContent) if(APPLE) enable_language(OBJCXX) @@ -26,7 +27,7 @@ include(cmake/automate-vcpkg.cmake) set(VCPKG_TRIPLET x64-windows-static) set(VCPKG_TARGET_TRIPLET x64-windows-static) vcpkg_bootstrap() -vcpkg_install_packages(zlib bzip2 libzip libpng sdl2 glew glfw3 nlohmann-json tinyxml2 spdlog) +vcpkg_install_packages(zlib bzip2 libzip libpng sdl2 glew glfw3 nlohmann-json tinyxml2 spdlog libogg libvorbis) set_property(DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME}) set_property(DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTY VS_DEBUGGER_WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) @@ -188,6 +189,13 @@ if (MSVC) endif() endif() +FetchContent_Declare( + dr_libs + GIT_REPOSITORY https://github.com/mackron/dr_libs.git + GIT_TAG da35f9d6c7374a95353fd1df1d394d44ab66cf01 +) +FetchContent_MakeAvailable(dr_libs) + #==============================================================================# # Libultraship Integration # #==============================================================================# @@ -224,6 +232,7 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/libultraship/src/graphic ${SDL2_INCLUDE_DIRS} ${GLEW_INCLUDE_DIRS} + ${dr_libs_SOURCE_DIR} ) add_subdirectory(libultraship ${CMAKE_CURRENT_SOURCE_DIR}/libultraship) @@ -280,8 +289,17 @@ endif() if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + find_package(Ogg CONFIG REQUIRED) + link_libraries(Ogg::ogg) + + find_package(Vorbis CONFIG REQUIRED) + link_libraries(Vorbis::vorbisfile) set(ADDITIONAL_LIBRARY_DEPENDENCIES "$<$:SDL2_net::SDL2_net-static>" + "Ogg::ogg" + "Vorbis::vorbis" + "Vorbis::vorbisenc" + "Vorbis::vorbisfile" ) elseif(CMAKE_SYSTEM_NAME STREQUAL "NintendoSwitch") set(ADDITIONAL_LIBRARY_DEPENDENCIES @@ -295,8 +313,14 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "CafeOS") ${DEVKITPRO}/portlibs/wiiu/include/ ) else() + find_package(Ogg REQUIRED) + find_package(Vorbis REQUIRED) set(ADDITIONAL_LIBRARY_DEPENDENCIES "$<$:SDL2_net::SDL2_net>" + "Ogg::ogg" + "Vorbis::vorbis" + "Vorbis::vorbisenc" + "Vorbis::vorbisfile" ) endif() diff --git a/assets/yaml/cn/rev0/ast_audio.yaml b/assets/yaml/cn/rev0/ast_audio.yaml new file mode 100644 index 00000000..e6bb775a --- /dev/null +++ b/assets/yaml/cn/rev0/ast_audio.yaml @@ -0,0 +1,31 @@ +:config: + force: true + header: + code: + - '#include "sys.h"' + - '#include "sf64audio_provisional.h"' + +audio_setup: + type: NAUDIO:V1:AUDIO_SETUP + driver: SF64 + audio_seq: + size: 0x3AFD0 + offset: 0xE9950 + audio_bank: + size: 0x1CB20 + offset: 0x1183A0 + audio_table: + size: 0x691AF0 + offset: 0x134EC0 + +audio_sample_bank_table: + { type: NAUDIO:V1:AUDIO_TABLE, format: SAMPLE, offset: 0xC1460, symbol: gSampleBankTableInit } + +audio_seq_table: + { type: NAUDIO:V1:AUDIO_TABLE, format: SEQUENCE, offset: 0xC14A0, symbol: gSeqTableInit } + +audio_soundfont_table: + { type: NAUDIO:V1:AUDIO_TABLE, format: SOUNDFONT, offset: 0xC18D0, symbol: gSoundFontTableInit } + +audio_seq_font_table: + { type: ARRAY, count: 283, array_type: u8, offset: 0xC1AF0, symbol: gSeqFontTableInit } \ No newline at end of file diff --git a/cmake/modules/FindOgg.cmake b/cmake/modules/FindOgg.cmake new file mode 100644 index 00000000..f606144f --- /dev/null +++ b/cmake/modules/FindOgg.cmake @@ -0,0 +1,61 @@ +# - Find ogg +# Find the native ogg includes and libraries +# +# OGG_INCLUDE_DIRS - where to find ogg.h, etc. +# OGG_LIBRARIES - List of libraries when using ogg. +# OGG_FOUND - True if ogg found. + +if (OGG_INCLUDE_DIR) + # Already in cache, be silent + set(OGG_FIND_QUIETLY TRUE) +endif () + +find_package (PkgConfig QUIET) +pkg_check_modules (PC_OGG QUIET ogg>=1.3.0) + +set (OGG_VERSION ${PC_OGG_VERSION}) + +find_path (OGG_INCLUDE_DIR ogg/ogg.h + HINTS + ${PC_OGG_INCLUDEDIR} + ${PC_OGG_INCLUDE_DIRS} + ${OGG_ROOT} + ) +# MSVC built ogg may be named ogg_static. +# The provided project files name the library with the lib prefix. +find_library (OGG_LIBRARY + NAMES + ogg + ogg_static + libogg + libogg_static + HINTS + ${PC_OGG_LIBDIR} + ${PC_OGG_LIBRARY_DIRS} + ${OGG_ROOT} + ) +# Handle the QUIETLY and REQUIRED arguments and set OGG_FOUND +# to TRUE if all listed variables are TRUE. +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (Ogg + REQUIRED_VARS + OGG_LIBRARY + OGG_INCLUDE_DIR + VERSION_VAR + OGG_VERSION + ) + +if (OGG_FOUND) + set (OGG_LIBRARIES ${OGG_LIBRARY}) + set (OGG_INCLUDE_DIRS ${OGG_INCLUDE_DIR}) + + if(NOT TARGET Ogg::ogg) + add_library(Ogg::ogg UNKNOWN IMPORTED) + set_target_properties(Ogg::ogg PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${OGG_INCLUDE_DIRS}" + IMPORTED_LOCATION "${OGG_LIBRARIES}" + ) + endif () +endif () + +mark_as_advanced (OGG_INCLUDE_DIR OGG_LIBRARY) \ No newline at end of file diff --git a/cmake/modules/FindVorbis.cmake b/cmake/modules/FindVorbis.cmake new file mode 100644 index 00000000..0d3d6624 --- /dev/null +++ b/cmake/modules/FindVorbis.cmake @@ -0,0 +1,197 @@ +#[=======================================================================[.rst: +FindVorbis +---------- +Finds the native vorbis, vorbisenc amd vorbisfile includes and libraries. +Imported Targets +^^^^^^^^^^^^^^^^ +This module provides the following imported targets, if found: +``Vorbis::vorbis`` + The Vorbis library +``Vorbis::vorbisenc`` + The VorbisEnc library +``Vorbis::vorbisfile`` + The VorbisFile library +Result Variables +^^^^^^^^^^^^^^^^ +This will define the following variables: +``Vorbis_Vorbis_INCLUDE_DIRS`` + List of include directories when using vorbis. +``Vorbis_Enc_INCLUDE_DIRS`` + List of include directories when using vorbisenc. +``Vorbis_File_INCLUDE_DIRS`` + List of include directories when using vorbisfile. +``Vorbis_Vorbis_LIBRARIES`` + List of libraries when using vorbis. +``Vorbis_Enc_LIBRARIES`` + List of libraries when using vorbisenc. +``Vorbis_File_LIBRARIES`` + List of libraries when using vorbisfile. +``Vorbis_FOUND`` + True if vorbis and requested components found. +``Vorbis_Vorbis_FOUND`` + True if vorbis found. +``Vorbis_Enc_FOUND`` + True if vorbisenc found. +``Vorbis_Enc_FOUND`` + True if vorbisfile found. +Cache variables +^^^^^^^^^^^^^^^ +The following cache variables may also be set: +``Vorbis_Vorbis_INCLUDE_DIR`` + The directory containing ``vorbis/vorbis.h``. +``Vorbis_Enc_INCLUDE_DIR`` + The directory containing ``vorbis/vorbisenc.h``. +``Vorbis_File_INCLUDE_DIR`` + The directory containing ``vorbis/vorbisenc.h``. +``Vorbis_Vorbis_LIBRARY`` + The path to the vorbis library. +``Vorbis_Enc_LIBRARY`` + The path to the vorbisenc library. +``Vorbis_File_LIBRARY`` + The path to the vorbisfile library. +Hints +^^^^^ +A user may set ``Vorbis_ROOT`` to a vorbis installation root to tell this module where to look. +#]=======================================================================] + +if (Vorbis_Vorbis_INCLUDE_DIR) + # Already in cache, be silent + set (Vorbis_FIND_QUIETLY TRUE) +endif () + +set (Vorbis_Vorbis_FIND_QUIETLY TRUE) +set (Vorbis_Enc_FIND_QUIETLY TRUE) +set (Vorbis_File_FIND_QUIETLY TRUE) + +find_package (Ogg QUIET) + +find_package (PkgConfig QUIET) +pkg_check_modules (PC_Vorbis_Vorbis QUIET vorbis) +pkg_check_modules (PC_Vorbis_Enc QUIET vorbisenc) +pkg_check_modules (PC_Vorbis_File QUIET vorbisfile) + +set (Vorbis_VERSION ${PC_Vorbis_Vorbis_VERSION}) + +find_path (Vorbis_Vorbis_INCLUDE_DIR vorbis/codec.h + HINTS + ${PC_Vorbis_Vorbis_INCLUDEDIR} + ${PC_Vorbis_Vorbis_INCLUDE_DIRS} + ${Vorbis_ROOT} + ) + +find_path (Vorbis_Enc_INCLUDE_DIR vorbis/vorbisenc.h + HINTS + ${PC_Vorbis_Enc_INCLUDEDIR} + ${PC_Vorbis_Enc_INCLUDE_DIRS} + ${Vorbis_ROOT} + ) + +find_path (Vorbis_File_INCLUDE_DIR vorbis/vorbisfile.h + HINTS + ${PC_Vorbis_File_INCLUDEDIR} + ${PC_Vorbis_File_INCLUDE_DIRS} + ${Vorbis_ROOT} + ) + +find_library (Vorbis_Vorbis_LIBRARY + NAMES + vorbis + vorbis_static + libvorbis + libvorbis_static + HINTS + ${PC_Vorbis_Vorbis_LIBDIR} + ${PC_Vorbis_Vorbis_LIBRARY_DIRS} + ${Vorbis_ROOT} + ) + +find_library (Vorbis_Enc_LIBRARY + NAMES + vorbisenc + vorbisenc_static + libvorbisenc + libvorbisenc_static + HINTS + ${PC_Vorbis_Enc_LIBDIR} + ${PC_Vorbis_Enc_LIBRARY_DIRS} + ${Vorbis_ROOT} + ) + +find_library (Vorbis_File_LIBRARY + NAMES + vorbisfile + vorbisfile_static + libvorbisfile + libvorbisfile_static + HINTS + ${PC_Vorbis_File_LIBDIR} + ${PC_Vorbis_File_LIBRARY_DIRS} + ${Vorbis_ROOT} + ) + +include (FindPackageHandleStandardArgs) + +if (Vorbis_Vorbis_LIBRARY AND Vorbis_Vorbis_INCLUDE_DIR AND Ogg_FOUND) + set (Vorbis_Vorbis_FOUND TRUE) +endif () + +if (Vorbis_Enc_LIBRARY AND Vorbis_Enc_INCLUDE_DIR AND Vorbis_Vorbis_FOUND) + set (Vorbis_Enc_FOUND TRUE) +endif () + +if (Vorbis_Vorbis_FOUND AND Vorbis_File_LIBRARY AND Vorbis_File_INCLUDE_DIR) + set (Vorbis_File_FOUND TRUE) +endif () + +find_package_handle_standard_args (Vorbis + REQUIRED_VARS + Vorbis_Vorbis_LIBRARY + Vorbis_Vorbis_INCLUDE_DIR + Ogg_FOUND + HANDLE_COMPONENTS + VERSION_VAR Vorbis_VERSION) + + +if (Vorbis_Vorbis_FOUND) + set (Vorbis_Vorbis_INCLUDE_DIRS ${VORBIS_INCLUDE_DIR}) + set (Vorbis_Vorbis_LIBRARIES ${VORBIS_LIBRARY} ${OGG_LIBRARIES}) + if (NOT TARGET Vorbis::vorbis) + add_library (Vorbis::vorbis UNKNOWN IMPORTED) + set_target_properties (Vorbis::vorbis PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Vorbis_Vorbis_INCLUDE_DIR}" + IMPORTED_LOCATION "${Vorbis_Vorbis_LIBRARY}" + INTERFACE_LINK_LIBRARIES Ogg::ogg + ) + endif () + + if (Vorbis_Enc_FOUND) + set (Vorbis_Enc_INCLUDE_DIRS ${Vorbis_Enc_INCLUDE_DIR}) + set (Vorbis_Enc_LIBRARIES ${Vorbis_Enc_LIBRARY} ${Vorbis_Enc_LIBRARIES}) + if (NOT TARGET Vorbis::vorbisenc) + add_library (Vorbis::vorbisenc UNKNOWN IMPORTED) + set_target_properties (Vorbis::vorbisenc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Vorbis_Enc_INCLUDE_DIR}" + IMPORTED_LOCATION "${Vorbis_Enc_LIBRARY}" + INTERFACE_LINK_LIBRARIES Vorbis::vorbis + ) + endif () + endif () + + if (Vorbis_File_FOUND) + set (Vorbis_File_INCLUDE_DIRS ${Vorbis_File_INCLUDE_DIR}) + set (Vorbis_File_LIBRARIES ${Vorbis_File_LIBRARY} ${Vorbis_File_LIBRARIES}) + if (NOT TARGET Vorbis::vorbisfile) + add_library (Vorbis::vorbisfile UNKNOWN IMPORTED) + set_target_properties (Vorbis::vorbisfile PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Vorbis_File_INCLUDE_DIR}" + IMPORTED_LOCATION "${Vorbis_File_LIBRARY}" + INTERFACE_LINK_LIBRARIES Vorbis::vorbis + ) + endif () + endif () + +endif () + +mark_as_advanced (Vorbis_Vorbis_INCLUDE_DIR Vorbis_Vorbis_LIBRARY) +mark_as_advanced (Vorbis_Enc_INCLUDE_DIR Vorbis_Enc_LIBRARY) +mark_as_advanced (Vorbis_File_INCLUDE_DIR Vorbis_File_LIBRARY) \ No newline at end of file diff --git a/docs/BUILDING.md b/docs/BUILDING.md index d042a25c..aac581e6 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -83,34 +83,34 @@ C:\Program Files\CMake\bin\cmake.exe --build build-cmake --target clean #### Debian/Ubuntu ```sh # using gcc -apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev +apt-get install gcc g++ git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev ibvorbis-dev # or using clang -apt-get install clang git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev +apt-get install clang git cmake ninja-build lsb-release libsdl2-dev libpng-dev libsdl2-net-dev libzip-dev zipcmp zipmerge ziptool nlohmann-json3-dev libtinyxml2-dev libspdlog-dev libboost-dev libopengl-dev libogg-dev libvorbis-dev ``` #### Arch ```sh # using gcc -pacman -S gcc git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost +pacman -S gcc git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost libogg libvorbis # or using clang -pacman -S clang git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost +pacman -S clang git cmake ninja lsb-release sdl2 libpng libzip nlohmann-json tinyxml2 spdlog sdl2_net boost libogg libvorbis ``` #### Fedora ```sh # using gcc -dnf install gcc gcc-c++ git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel +dnf install gcc gcc-c++ git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel libogg-devel libvorbis-devel # or using clang -dnf install clang git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel +dnf install clang git cmake ninja-build lsb_release SDL2-devel libpng-devel libzip-devel libzip-tools nlohmann-json-devel tinyxml2-devel spdlog-devel boost-devel libogg-devel libvorbis-devel ``` #### openSUSE ```sh # using gcc -zypper in gcc gcc-c++ git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel +zypper in gcc gcc-c++ git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel libogg-devel libvorbis-devel # or using clang -zypper in clang libstdc++-devel git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel +zypper in clang libstdc++-devel git cmake ninja SDL2-devel libpng16-devel libzip-devel libzip-tools nlohmann_json-devel tinyxml2-devel spdlog-devel libogg-devel libvorbis-devel ``` ### Build @@ -160,7 +160,7 @@ cmake --build build-cmake --target clean ``` ## macOS -Requires Xcode (or xcode-tools) && `sdl2, libpng, glew, ninja, cmake, nlohmann-json, libzip` (can be installed via homebrew, macports, etc) +Requires Xcode (or xcode-tools) && `sdl2, libpng, glew, ninja, cmake, nlohmann-json, libzip, vorbis-tools` (can be installed via homebrew, macports, etc) **Important: For maximum performance make sure you have ninja build tools installed!** diff --git a/src/audio/audio_synthesis.c b/src/audio/audio_synthesis.c index 090e5128..f360229a 100644 --- a/src/audio/audio_synthesis.c +++ b/src/audio/audio_synthesis.c @@ -1036,13 +1036,14 @@ Acmd* AudioSynth_ProcessNote(s32 noteIndex, NoteSubEu* noteSub, NoteSynthesisSta goto skip; case CODEC_S16: + flags = A_CONTINUE; skipBytes = 0; size_t bytesToRead; numSamplesProcessed += numSamplesToLoadAdj; dmemUncompressedAddrOffset1 = numSamplesToLoadAdj; if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj)*SAMPLE_SIZE) < bookSample->size) { - bytesToRead = (numSamplesToLoadAdj)*SAMPLE_SIZE; + bytesToRead = (numSamplesToLoadAdj) * SAMPLE_SIZE; } else { bytesToRead = bookSample->size - (synthState->samplePosInt * 2); } diff --git a/src/port/Engine.cpp b/src/port/Engine.cpp index 98492a67..aaebe67a 100644 --- a/src/port/Engine.cpp +++ b/src/port/Engine.cpp @@ -237,8 +237,9 @@ GameEngine::GameEngine() { loader->RegisterResourceFactory(std::make_shared(), RESOURCE_FORMAT_BINARY, "Sample", static_cast(SF64::ResourceType::Sample), 1); - loader->RegisterResourceFactory(std::make_shared(), RESOURCE_FORMAT_BINARY, - "Sample", static_cast(SF64::ResourceType::Sample), 2); + + loader->RegisterResourceFactory(std::make_shared(), RESOURCE_FORMAT_XML, + "Sample", static_cast(SF64::ResourceType::Sample), 0); loader->RegisterResourceFactory(std::make_shared(), RESOURCE_FORMAT_BINARY, "SoundFont", static_cast(SF64::ResourceType::SoundFont), 0); diff --git a/src/port/resource/importers/audio/SampleFactory.cpp b/src/port/resource/importers/audio/SampleFactory.cpp index cac380d2..8289266a 100644 --- a/src/port/resource/importers/audio/SampleFactory.cpp +++ b/src/port/resource/importers/audio/SampleFactory.cpp @@ -1,6 +1,14 @@ #include "SampleFactory.h" #include "../ResourceUtil.h" #include "port/resource/type/audio/Sample.h" +#include "sf64audio_provisional.h" +#define DR_WAV_IMPLEMENTATION +#include + +#define DR_MP3_IMPLEMENTATION +#include + +#include "vorbis/vorbisfile.h" namespace SF64 { std::shared_ptr ResourceFactoryBinarySampleV1::ReadResource(std::shared_ptr file) { @@ -23,7 +31,7 @@ std::shared_ptr ResourceFactoryBinarySampleV1::ReadResource(std if(sample->mSample.codec == 2){ sample->mSample.medium = 2; for(size_t i = 0; i < sample->mSample.size / 2; i++){ - int16_t* sampleData = (int16_t*) sample->mSample.sampleAddr; + auto sampleData = (int16_t*) sample->mSample.sampleAddr; sampleData[i] = BSWAP16(sampleData[i]); } } else { @@ -35,36 +43,230 @@ std::shared_ptr ResourceFactoryBinarySampleV1::ReadResource(std return sample; } -std::shared_ptr ResourceFactoryBinarySampleV2::ReadResource(std::shared_ptr file) { +static size_t VorbisReadCallback(void* out, size_t size, size_t elems, void* src) { + OggFileData* data = static_cast(src); + size_t toRead = size * elems; + + if (toRead > data->size - data->pos) { + toRead = data->size - data->pos; + } + + memcpy(out, static_cast(data->data) + data->pos, toRead); + data->pos += toRead; + + return toRead / size; +} + +static int VorbisSeekCallback(void* src, ogg_int64_t pos, int whence) { + OggFileData* data = static_cast(src); + size_t newPos; + + switch (whence) { + case SEEK_SET: + newPos = pos; + break; + case SEEK_CUR: + newPos = data->pos + pos; + break; + case SEEK_END: + newPos = data->size + pos; + break; + default: + return -1; + } + if (newPos > data->size) { + return -1; + } + data->pos = newPos; + return 0; +} + +static int VorbisCloseCallback([[maybe_unused]] void* src) { + return 0; +} + +static long VorbisTellCallback(void* src) { + OggFileData* data = static_cast(src); + return data->pos; +} + +static const ov_callbacks vorbisCallbacks = { + VorbisReadCallback, + VorbisSeekCallback, + VorbisCloseCallback, + VorbisTellCallback, +}; + +static void Mp3DecoderWorker(std::shared_ptr sample, std::shared_ptr sampleFile) { + drmp3 mp3; + drwav_uint64 numFrames; + drmp3_bool32 ret = + drmp3_init_memory(&mp3, sampleFile->Buffer.get()->data(), sampleFile->Buffer.get()->size(), nullptr); + numFrames = drmp3_get_pcm_frame_count(&mp3); + drwav_uint64 channels = mp3.channels; + drwav_uint64 sampleRate = mp3.sampleRate; + + sample->mSample.sampleAddr = new uint8_t[numFrames * channels * 2]; + drmp3_read_pcm_frames_s16(&mp3, numFrames, (int16_t*)sample->mSample.sampleAddr); +} + +static void OggDecoderWorker(std::shared_ptr sample, std::shared_ptr sampleFile) { + OggVorbis_File vf; + char dataBuff[4096]; + long read = 0; + size_t pos = 0; + + OggFileData fileData = { + .data = sampleFile->Buffer.get()->data(), + .pos = 0, + .size = sampleFile->Buffer.get()->size(), + }; + int ret = ov_open_callbacks(&fileData, &vf, nullptr, 0, vorbisCallbacks); + + vorbis_info* vi = ov_info(&vf, -1); + + uint64_t numFrames = ov_pcm_total(&vf, -1); + uint64_t sampleRate = vi->rate; + uint64_t numChannels = vi->channels; + int bitStream = 0; + size_t toRead = numFrames * numChannels * 2; + sample->mSample.sampleAddr = new uint8_t[toRead]; + do { + read = ov_read(&vf, dataBuff, 4096, 0, 2, 1, &bitStream); + memcpy(sample->mSample.sampleAddr + pos, dataBuff, read); + pos += read; + } while (read != 0); + ov_clear(&vf); +} + +std::shared_ptr ResourceFactoryXMLSampleV0::ReadResource(std::shared_ptr file) { if (!FileHasValidFormatAndReader(file)) { return nullptr; } auto sample = std::make_shared(file->InitData); - auto reader = std::get>(file->Reader); + auto child = std::get>(file->Reader)->FirstChildElement(); + std::shared_ptr initData = std::make_shared(); + const char* customFormatStr = child->Attribute("CustomFormat"); + memset(&sample->mSample, 0, sizeof(sample->mSample)); + sample->mSample.isRelocated = 0; + sample->mSample.codec = CodecStrToInt(child->Attribute("Codec"), file->InitData->Path.c_str()); + sample->mSample.medium = MediumStrToInt(child->Attribute("Medium")); + sample->mSample.unk = child->IntAttribute("bit26"); - sample->mSample.codec = reader->ReadUByte(); - sample->mSample.medium = reader->ReadUByte(); - sample->mSample.unk = reader->ReadUByte(); - sample->mSample.size = reader->ReadUInt32(); - sample->mSample.tuning = reader->ReadFloat(); - sample->mSample.loop = LoadChild(reader->ReadUInt64()); - sample->mSample.book = LoadChild(reader->ReadUInt64()); - sample->mSample.sampleAddr = new uint8_t[sample->mSample.size]; - reader->Read((char*) sample->mSample.sampleAddr, sample->mSample.size); - - if(sample->mSample.codec == 2){ - sample->mSample.medium = 2; - for(size_t i = 0; i < sample->mSample.size / 2; i++){ - int16_t* sampleData = (int16_t*) sample->mSample.sampleAddr; - sampleData[i] = BSWAP16(sampleData[i]); + tinyxml2::XMLElement* loopRoot = child->FirstChildElement("ADPCMLoop"); + if (loopRoot != nullptr) { + size_t i = 0; + sample->mSample.loop = new AdpcmLoopData(); + sample->mSample.loop->start = loopRoot->UnsignedAttribute("Start"); + sample->mSample.loop->end = loopRoot->UnsignedAttribute("End"); + sample->mSample.loop->count = loopRoot->UnsignedAttribute("Count"); + tinyxml2::XMLElement* predictor = loopRoot->FirstChildElement("Predictor"); + while (predictor != nullptr) { + sample->mSample.loop->predictorState[i++] = predictor->IntAttribute("State"); + predictor = predictor->NextSiblingElement(); } - } else { - sample->mSample.medium = 0; } - sample->mSample.isRelocated = 1; + tinyxml2::XMLElement* bookRoot = child->FirstChildElement("ADPCMBook"); + if (bookRoot != nullptr) { + size_t i = 0; + sample->mSample.book = new AdpcmBookData(); + sample->mSample.book->numPredictors = bookRoot->IntAttribute("Npredictors"); + sample->mSample.book->order = bookRoot->IntAttribute("Order"); + tinyxml2::XMLElement* book = bookRoot->FirstChildElement("Book"); + size_t numBooks = sample->mSample.book->numPredictors * sample->mSample.book->order * 8; + sample->mSample.book->book = new int16_t[numBooks]; + while (book != nullptr) { + sample->mSample.book->book[i++] = book->IntAttribute("Page"); + book = book->NextSiblingElement(); + } + } + + size_t size = child->Int64Attribute("Size"); + sample->mSample.size = size; + + const char* path = child->Attribute("Path"); + initData->Path = path; + initData->IsCustom = false; + initData->ByteOrder = Ship::Endianness::Native; + auto sampleFile = Ship::Context::GetInstance()->GetResourceManager()->GetArchiveManager()->LoadFile(path, initData); + if (customFormatStr != nullptr) { + // Compressed files can take a really long time to decode (~250ms per). + // This worked when we tested it (09/04/2024) (Works on my machine) + if (strcmp(customFormatStr, "wav") == 0) { + drwav wav; + drwav_uint64 numFrames; + + drwav_bool32 ret = + drwav_init_memory(&wav, sampleFile->Buffer.get()->data(), sampleFile->Buffer.get()->size(), nullptr); + + drwav_get_length_in_pcm_frames(&wav, &numFrames); + + sample->mSample.tuning = (wav.sampleRate * wav.channels) / 32000.0f; + sample->mSample.sampleAddr = new uint8_t[numFrames * wav.channels * 2]; + + drwav_read_pcm_frames_s16(&wav, numFrames, (int16_t*)sample->mSample.sampleAddr); + return sample; + } else if (strcmp(customFormatStr, "ogg") == 0) { + std::thread fileDecoderThread = std::thread(OggDecoderWorker, sample, sampleFile); + fileDecoderThread.detach(); + return sample; + } else if (strcmp(customFormatStr, "mp3") == 0) { + std::thread fileDecoderThread = std::thread(Mp3DecoderWorker, sample, sampleFile); + fileDecoderThread.detach(); + return sample; + } + } + // Not a normal streamed sample. Fallback to the original ADPCM sample to be decoded by the audio engine. + sample->mSample.sampleAddr = new uint8_t[size]; + // Can't use memcpy due to endianness issues. + for (uint32_t i = 0; i < size; i++) { + sample->mSample.sampleAddr[i] = (*sampleFile->Buffer)[i]; + } return sample; } + +uint8_t ResourceFactoryXMLSampleV0::CodecStrToInt(const char* str, const char* file) { + if (strcmp("ADPCM", str) == 0) { + return CODEC_ADPCM; + } else if (strcmp("S8", str) == 0) { + return CODEC_S8; + } else if (strcmp("S16MEM", str) == 0) { + return CODEC_S16_INMEMORY; + } else if (strcmp("ADPCMSMALL", str) == 0) { + return CODEC_SMALL_ADPCM; + } else if (strcmp("REVERB", str) == 0) { + return CODEC_REVERB; + } else if (strcmp("S16", str) == 0) { + return CODEC_S16; + } else { + char buff[2048]; + snprintf(buff, 2048, + "Invalid codec in %s. Got %s, expected ADPCM, S8, S16MEM, ADPCMSMALL, REVERB, S16, UNK6, UNK7.", file, + str); + throw std::runtime_error(buff); + } +} + +uint32_t ResourceFactoryXMLSampleV0::MediumStrToInt(const char* str) { + if (!strcmp("Ram", str)) { + return 0; + } else if (!strcmp("Unk", str)) { + return 1; + } else if (!strcmp("Cart", str)) { + return 2; + } else if (!strcmp("Disk", str)) { + return 3; + // 4 is skipped + } else if (!strcmp("RamUnloaded", str)) { + return 5; + } else { + char buff[2048]; + snprintf(buff, 2048, + "Bad medium value. Got %s, expected Ram, Unk, Cart, or Disk.", str); + throw std::runtime_error(buff); + } +} } // namespace LUS diff --git a/src/port/resource/importers/audio/SampleFactory.h b/src/port/resource/importers/audio/SampleFactory.h index 4b2017eb..01fbf97e 100644 --- a/src/port/resource/importers/audio/SampleFactory.h +++ b/src/port/resource/importers/audio/SampleFactory.h @@ -1,16 +1,26 @@ #pragma once #include "Resource.h" +#include "ResourceFactoryXML.h" #include "ResourceFactoryBinary.h" namespace SF64 { +struct OggFileData { + void* data; + size_t pos; + size_t size; +}; + class ResourceFactoryBinarySampleV1 : public Ship::ResourceFactoryBinary { public: std::shared_ptr ReadResource(std::shared_ptr file) override; }; -class ResourceFactoryBinarySampleV2 : public Ship::ResourceFactoryBinary { +class ResourceFactoryXMLSampleV0 : public Ship::ResourceFactoryXML { public: std::shared_ptr ReadResource(std::shared_ptr file) override; + private: + static uint8_t CodecStrToInt(const char* str, const char* file); + static uint32_t MediumStrToInt(const char* str); }; }; // namespace LUS diff --git a/tools/Torch b/tools/Torch index 28dcd128..053d97a4 160000 --- a/tools/Torch +++ b/tools/Torch @@ -1 +1 @@ -Subproject commit 28dcd128b0406a43ab7ef9718213f7ab7d3736f8 +Subproject commit 053d97a433f3cfc9607b7cedb512d2e7ee1dc78a From 93199b4c02eca1364359e88fdd700386d6fc8bf0 Mon Sep 17 00:00:00 2001 From: KiritoDv Date: Mon, 3 Feb 2025 03:12:20 -0600 Subject: [PATCH 2/6] We did partial success while loading this --- src/audio/audio_synthesis.c | 2 +- src/port/Engine.cpp | 3 + .../importers/audio/SampleFactory.cpp | 14 +- .../importers/audio/SoundFontFactory.cpp | 212 ++++++++++++++++++ .../importers/audio/SoundFontFactory.h | 15 ++ tools/Torch | 2 +- 6 files changed, 241 insertions(+), 7 deletions(-) diff --git a/src/audio/audio_synthesis.c b/src/audio/audio_synthesis.c index f360229a..a9f16173 100644 --- a/src/audio/audio_synthesis.c +++ b/src/audio/audio_synthesis.c @@ -1042,7 +1042,7 @@ Acmd* AudioSynth_ProcessNote(s32 noteIndex, NoteSubEu* noteSub, NoteSynthesisSta numSamplesProcessed += numSamplesToLoadAdj; dmemUncompressedAddrOffset1 = numSamplesToLoadAdj; - if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj)*SAMPLE_SIZE) < bookSample->size) { + if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj) * SAMPLE_SIZE) < bookSample->size) { bytesToRead = (numSamplesToLoadAdj) * SAMPLE_SIZE; } else { bytesToRead = bookSample->size - (synthState->samplePosInt * 2); diff --git a/src/port/Engine.cpp b/src/port/Engine.cpp index aaebe67a..dcedb08d 100644 --- a/src/port/Engine.cpp +++ b/src/port/Engine.cpp @@ -244,6 +244,9 @@ GameEngine::GameEngine() { loader->RegisterResourceFactory(std::make_shared(), RESOURCE_FORMAT_BINARY, "SoundFont", static_cast(SF64::ResourceType::SoundFont), 0); + loader->RegisterResourceFactory(std::make_shared(), RESOURCE_FORMAT_XML, + "SoundFont", static_cast(SF64::ResourceType::SoundFont), 0); + prevAltAssets = CVarGetInteger("gEnhancements.Mods.AlternateAssets", 0); gEnableGammaBoost = CVarGetInteger("gGraphics.GammaMode", 0) == 0; context->GetResourceManager()->SetAltAssetsEnabled(prevAltAssets); diff --git a/src/port/resource/importers/audio/SampleFactory.cpp b/src/port/resource/importers/audio/SampleFactory.cpp index 8289266a..994a9131 100644 --- a/src/port/resource/importers/audio/SampleFactory.cpp +++ b/src/port/resource/importers/audio/SampleFactory.cpp @@ -101,12 +101,14 @@ static void Mp3DecoderWorker(std::shared_ptr sample, std::shared_ptrBuffer.get()->data(), sampleFile->Buffer.get()->size(), nullptr); + drmp3_init_memory(&mp3, sampleFile->Buffer->data(), sampleFile->Buffer->size(), nullptr); numFrames = drmp3_get_pcm_frame_count(&mp3); drwav_uint64 channels = mp3.channels; drwav_uint64 sampleRate = mp3.sampleRate; - sample->mSample.sampleAddr = new uint8_t[numFrames * channels * 2]; + sample->mSample.tuning = (float)(sampleRate * channels) / 32000.0f; + sample->mSample.size = numFrames * channels * 2; + sample->mSample.sampleAddr = new uint8_t[sample->mSample.size]; drmp3_read_pcm_frames_s16(&mp3, numFrames, (int16_t*)sample->mSample.sampleAddr); } @@ -131,6 +133,7 @@ static void OggDecoderWorker(std::shared_ptr sample, std::shared_ptrmSample.sampleAddr = new uint8_t[toRead]; + sample->mSample.tuning = (float)(sampleRate * numChannels) / 32000.0f; do { read = ov_read(&vf, dataBuff, 4096, 0, 2, 1, &bitStream); memcpy(sample->mSample.sampleAddr + pos, dataBuff, read); @@ -199,12 +202,13 @@ std::shared_ptr ResourceFactoryXMLSampleV0::ReadResource(std::s drwav_uint64 numFrames; drwav_bool32 ret = - drwav_init_memory(&wav, sampleFile->Buffer.get()->data(), sampleFile->Buffer.get()->size(), nullptr); + drwav_init_memory(&wav, sampleFile->Buffer->data(), sampleFile->Buffer->size(), nullptr); drwav_get_length_in_pcm_frames(&wav, &numFrames); - sample->mSample.tuning = (wav.sampleRate * wav.channels) / 32000.0f; - sample->mSample.sampleAddr = new uint8_t[numFrames * wav.channels * 2]; + sample->mSample.tuning = (float)(wav.sampleRate * wav.channels) / 32000.0f; + sample->mSample.size = numFrames * wav.channels * 2; + sample->mSample.sampleAddr = new uint8_t[sample->mSample.size]; drwav_read_pcm_frames_s16(&wav, numFrames, (int16_t*)sample->mSample.sampleAddr); return sample; diff --git a/src/port/resource/importers/audio/SoundFontFactory.cpp b/src/port/resource/importers/audio/SoundFontFactory.cpp index 431f9410..1fda2854 100644 --- a/src/port/resource/importers/audio/SoundFontFactory.cpp +++ b/src/port/resource/importers/audio/SoundFontFactory.cpp @@ -1,5 +1,7 @@ #include "SoundFontFactory.h" #include "../ResourceUtil.h" +#include "utils/StringHelper.h" +#include #include "port/resource/type/audio/SoundFont.h" namespace SF64 { @@ -29,4 +31,214 @@ std::shared_ptr ResourceFactoryBinarySoundFontV0::ReadResource( return font; } + +int8_t ResourceFactoryXMLSoundFontV0::MediumStrToInt(const char* str) { + if (!strcmp("Ram", str)) { + return MEDIUM_RAM; + } else if (!strcmp("Unk", str)) { + return MEDIUM_UNK; + } else if (!strcmp("Cart", str)) { + return MEDIUM_CART; + } else if (!strcmp("Disk", str)) { + return MEDIUM_DISK_DRIVE; + // 4 is skipped + } else { + throw std::runtime_error( + StringHelper::Sprintf("Bad medium value. Got %s, expected Ram, Unk, Cart, or Disk.", str)); + } +} + +int8_t ResourceFactoryXMLSoundFontV0::CachePolicyToInt(const char* str) { + if (!strcmp("Temporary", str)) { + return CACHE_TEMPORARY; + } else if (!strcmp("Persistent", str)) { + return CACHE_PERSISTENT; + } else if (!strcmp("Either", str)) { + return CACHE_EITHER; + } else if (!strcmp("Permanent", str)) { + return CACHE_PERMANENT; + } else { + throw std::runtime_error(StringHelper::Sprintf( + "Bad cache policy value. Got %s, expected Temporary, Persistent, Either, or Permanent.", str)); + } +} + +void ResourceFactoryXMLSoundFontV0::ParseDrums(SoundFont* soundFont, tinyxml2::XMLElement* element) { + element = (tinyxml2::XMLElement*)element->FirstChildElement(); + // No drums + if (element == nullptr) { + soundFont->mFont.drums = nullptr; + soundFont->mFont.numDrums = 0; + return; + } + + do { + auto drum = new DrumData; + std::vector envelopes; + drum->adsrDecayIndex = element->IntAttribute("ReleaseRate"); + drum->pan = element->IntAttribute("Pan"); + drum->isRelocated = element->IntAttribute("Loaded"); + drum->tunedSample.tuning = element->FloatAttribute("Tuning"); + const char* sampleStr = element->Attribute("SampleRef"); + + if (sampleStr != nullptr && sampleStr[0] != 0) { + auto res = Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr); + drum->tunedSample.sample = static_cast(res ? res->GetRawPointer() : nullptr); + } else { + drum->tunedSample.sample = nullptr; + } + + element = (tinyxml2::XMLElement*)element->FirstChildElement(); + if (!strcmp(element->Name(), "Envelopes")) { + // element = (tinyxml2::XMLElement*)element->FirstChildElement(); + unsigned int envCount = 0; + envelopes = ParseEnvelopes(soundFont, element, &envCount); + element = (tinyxml2::XMLElement*)element->Parent(); + drum->envelope = new EnvelopePointData[envelopes.size()]; + memcpy(drum->envelope, envelopes.data(), envelopes.size() * sizeof(EnvelopePointData)); + } else { + drum->envelope = nullptr; + } + + if (drum->tunedSample.sample == nullptr) { + soundFont->mDrums.push_back(nullptr); + } else { + soundFont->mDrums.push_back(drum); + } + + element = element->NextSiblingElement(); + } while (element != nullptr); + + soundFont->mFont.numDrums = soundFont->mDrums.size(); + soundFont->mFont.drums = soundFont->mDrums.data(); +} + +void ResourceFactoryXMLSoundFontV0::ParseInstruments(SoundFont* soundFont, tinyxml2::XMLElement* element) { + element = element->FirstChildElement(); + do { + auto instrument = new InstrumentData; + unsigned int envCount = 0; + std::vector envelopes; + + int isValid = element->BoolAttribute("IsValid"); + instrument->isRelocated = element->IntAttribute("Loaded"); + instrument->normalRangeLo = element->IntAttribute("NormalRangeLo"); + instrument->normalRangeHi = element->IntAttribute("NormalRangeHi"); + instrument->adsrDecayIndex = element->IntAttribute("ReleaseRate"); + tinyxml2::XMLElement* instrumentElement = element->FirstChildElement(); + tinyxml2::XMLElement* instrumentElementCopy = instrumentElement; + + if (instrumentElement != nullptr && !strcmp(instrumentElement->Name(), "Envelopes")) { + envelopes = ParseEnvelopes(soundFont, instrumentElement, &envCount); + instrument->envelope = new EnvelopePointData[envelopes.size()]; + memcpy(instrument->envelope, envelopes.data(), envelopes.size() * sizeof(EnvelopePointData)); + instrumentElement = instrumentElement->NextSiblingElement(); + } + + if (instrumentElement != nullptr && !strcmp("LowNotesSound", instrumentElement->Name())) { + instrument->lowPitchTunedSample.tuning = instrumentElement->FloatAttribute("Tuning"); + const char* sampleStr = instrumentElement->Attribute("SampleRef"); + if (sampleStr != nullptr && sampleStr[0] != 0) { + auto res = static_pointer_cast( + Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr, true)); + auto sample = static_cast(res ? res->GetRawPointer() : nullptr); + instrument->lowPitchTunedSample.sample = sample; + if (sample != nullptr && sample->tuning != 0.0f) { + instrument->lowPitchTunedSample.tuning = sample->tuning; + } + } + instrumentElement = instrumentElement->NextSiblingElement(); + } + + if (instrumentElement != nullptr && !strcmp("NormalNotesSound", instrumentElement->Name())) { + instrument->normalPitchTunedSample.tuning = instrumentElement->FloatAttribute("Tuning"); + const char* sampleStr = instrumentElement->Attribute("SampleRef"); + if (sampleStr != nullptr && sampleStr[0] != 0) { + auto res = static_pointer_cast( + Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr, true)); + auto sample = static_cast(res ? res->GetRawPointer() : nullptr); + instrument->normalPitchTunedSample.sample = sample; + if (sample != nullptr && sample->tuning != 0.0f) { + instrument->normalPitchTunedSample.tuning = sample->tuning; + } + } + instrumentElement = instrumentElement->NextSiblingElement(); + } + + if (instrumentElement != nullptr && !strcmp("HighNotesSound", instrumentElement->Name())) { + instrument->highPitchTunedSample.tuning = instrumentElement->FloatAttribute("Tuning"); + const char* sampleStr = instrumentElement->Attribute("SampleRef"); + if (sampleStr != nullptr && sampleStr[0] != 0) { + auto res = static_pointer_cast( + Ship::Context::GetInstance()->GetResourceManager()->LoadResourceProcess(sampleStr, true)); + auto sample = static_cast(res ? res->GetRawPointer() : nullptr); + instrument->highPitchTunedSample.sample = sample; + if (sample != nullptr && sample->tuning != 0.0f) { + instrument->highPitchTunedSample.tuning = sample->tuning; + } + } + instrumentElement = instrumentElement->NextSiblingElement(); + } + + soundFont->mInstruments.push_back(instrument); + + element = instrumentElementCopy; + element = (tinyxml2::XMLElement*)element->Parent(); + element = element->NextSiblingElement(); + } while (element != nullptr); + + soundFont->mFont.instruments = soundFont->mInstruments.data(); + soundFont->mFont.numInstruments = soundFont->mInstruments.size(); +} + +std::vector ResourceFactoryXMLSoundFontV0::ParseEnvelopes(SoundFont* soundFont, + tinyxml2::XMLElement* element, + unsigned int* count) { + std::vector envelopes; + unsigned int total = 0; + element = element->FirstChildElement("Envelope"); + while (element != nullptr) { + EnvelopePointData env = { + .delay = (s16)element->IntAttribute("Delay"), + .arg = (s16)element->IntAttribute("Arg"), + }; + envelopes.emplace_back(env); + element = element->NextSiblingElement("Envelope"); + total++; + } + *count = total; + return envelopes; +} + +std::shared_ptr ResourceFactoryXMLSoundFontV0::ReadResource(std::shared_ptr file) { + if (!FileHasValidFormatAndReader(file)) { + return nullptr; + } + auto audioSoundFont = std::make_shared(file->InitData); + auto child = std::get>(file->Reader)->FirstChildElement(); + // Header data + memset(&audioSoundFont->mFont, 0, sizeof(audioSoundFont->mFont)); + + auto shortData1 = child->IntAttribute("Data1"); + auto shortData2 = child->IntAttribute("Data2"); + + audioSoundFont->mFont.numInstruments = (shortData2 >> 8) & 0xFFu; + audioSoundFont->mFont.numDrums = shortData2 & 0xFFu; + audioSoundFont->mFont.sampleBankId1 = (shortData1 >> 8) & 0xFFu; + audioSoundFont->mFont.sampleBankId2 = shortData1 & 0xFFu; + + child = (tinyxml2::XMLElement*)child->FirstChildElement(); + + while (child != nullptr) { + const char* name = child->Name(); + + if (!strcmp(name, "Drums")) { + ParseDrums(audioSoundFont.get(), child); + } else if (!strcmp(name, "Instruments")) { + ParseInstruments(audioSoundFont.get(), child); + } + child = child->NextSiblingElement(); + } + return audioSoundFont; +} } // namespace LUS diff --git a/src/port/resource/importers/audio/SoundFontFactory.h b/src/port/resource/importers/audio/SoundFontFactory.h index 04823a55..b5afda9a 100644 --- a/src/port/resource/importers/audio/SoundFontFactory.h +++ b/src/port/resource/importers/audio/SoundFontFactory.h @@ -1,11 +1,26 @@ #pragma once #include "Resource.h" +#include "ResourceFactoryXML.h" #include "ResourceFactoryBinary.h" +#include "port/resource/type/audio/SoundFont.h" namespace SF64 { class ResourceFactoryBinarySoundFontV0 : public Ship::ResourceFactoryBinary { public: std::shared_ptr ReadResource(std::shared_ptr file) override; }; + +class ResourceFactoryXMLSoundFontV0 : public Ship::ResourceFactoryXML { + public: + std::shared_ptr ReadResource(std::shared_ptr file) override; + static int8_t MediumStrToInt(const char* str); + static int8_t CachePolicyToInt(const char* str); + + private: + void ParseDrums(SoundFont* soundFont, tinyxml2::XMLElement* element); + void ParseInstruments(SoundFont* soundFont, tinyxml2::XMLElement* element); + std::vector ParseEnvelopes(SoundFont* soundFont, tinyxml2::XMLElement* element, + unsigned int* count); +}; }; // namespace LUS diff --git a/tools/Torch b/tools/Torch index 053d97a4..27af7233 160000 --- a/tools/Torch +++ b/tools/Torch @@ -1 +1 @@ -Subproject commit 053d97a433f3cfc9607b7cedb512d2e7ee1dc78a +Subproject commit 27af72331ceba7703f0382bb0316320baed377a3 From 3020f3bd95676d2830a8310b92017880404958ad Mon Sep 17 00:00:00 2001 From: KiritoDv Date: Mon, 3 Feb 2025 11:48:30 -0600 Subject: [PATCH 3/6] Fixed implementation issues with CODEC_S16 --- src/audio/audio_synthesis.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/audio/audio_synthesis.c b/src/audio/audio_synthesis.c index a9f16173..93cb7b1b 100644 --- a/src/audio/audio_synthesis.c +++ b/src/audio/audio_synthesis.c @@ -1036,21 +1036,23 @@ Acmd* AudioSynth_ProcessNote(s32 noteIndex, NoteSubEu* noteSub, NoteSynthesisSta goto skip; case CODEC_S16: + aClearBuffer(cmd, DMEM_UNCOMPRESSED_NOTE, (numSamplesToLoadAdj + 16) * 2); flags = A_CONTINUE; skipBytes = 0; - size_t bytesToRead; numSamplesProcessed += numSamplesToLoadAdj; - dmemUncompressedAddrOffset1 = numSamplesToLoadAdj; + aligned = numSamplesToLoadAdj; + size_t bytesToRead; - if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj) * SAMPLE_SIZE) < bookSample->size) { - bytesToRead = (numSamplesToLoadAdj) * SAMPLE_SIZE; + if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj + 16) * 2) < + bookSample->size) { + bytesToRead = (numSamplesToLoadAdj + 16) * 2; } else { bytesToRead = bookSample->size - (synthState->samplePosInt * 2); } // @port [Custom audio] // TLDR samples are loaded async and might be null the first time they are played. // See note in AudioSampleFactory.cpp - if (sampleAddr != NULL) { + if ((void*) sampleAddr != NULL) { aLoadBuffer(cmd++, sampleAddr + (synthState->samplePosInt * 2), DMEM_UNCOMPRESSED_NOTE, bytesToRead); } From 717414a5c9dcec23829f56f0e0f997ca5c70f09b Mon Sep 17 00:00:00 2001 From: KiritoDv Date: Mon, 3 Feb 2025 14:57:28 -0600 Subject: [PATCH 4/6] Fixed isRelocated --- src/port/resource/importers/audio/SampleFactory.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/port/resource/importers/audio/SampleFactory.cpp b/src/port/resource/importers/audio/SampleFactory.cpp index 994a9131..890641a3 100644 --- a/src/port/resource/importers/audio/SampleFactory.cpp +++ b/src/port/resource/importers/audio/SampleFactory.cpp @@ -229,6 +229,8 @@ std::shared_ptr ResourceFactoryXMLSampleV0::ReadResource(std::s sample->mSample.sampleAddr[i] = (*sampleFile->Buffer)[i]; } + sample->mSample.isRelocated = 1; + return sample; } From 4734a5ea068c6ed23f106324364da0332f65f0f8 Mon Sep 17 00:00:00 2001 From: KiritoDv Date: Mon, 3 Feb 2025 17:18:21 -0600 Subject: [PATCH 5/6] Rewritten this thing --- src/audio/audio_synthesis.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/src/audio/audio_synthesis.c b/src/audio/audio_synthesis.c index 93cb7b1b..af3348c0 100644 --- a/src/audio/audio_synthesis.c +++ b/src/audio/audio_synthesis.c @@ -1036,26 +1036,12 @@ Acmd* AudioSynth_ProcessNote(s32 noteIndex, NoteSubEu* noteSub, NoteSynthesisSta goto skip; case CODEC_S16: - aClearBuffer(cmd, DMEM_UNCOMPRESSED_NOTE, (numSamplesToLoadAdj + 16) * 2); + aLoadBuffer(aList++, OS_K0_TO_PHYSICAL(bookSample->sampleAddr + synthState->samplePosInt * 2), DMEM_UNCOMPRESSED_NOTE, + (numSamplesToLoadAdj + SAMPLES_PER_FRAME) * 2); flags = A_CONTINUE; skipBytes = 0; - numSamplesProcessed += numSamplesToLoadAdj; - aligned = numSamplesToLoadAdj; - size_t bytesToRead; - - if (((synthState->samplePosInt * 2) + (numSamplesToLoadAdj + 16) * 2) < - bookSample->size) { - bytesToRead = (numSamplesToLoadAdj + 16) * 2; - } else { - bytesToRead = bookSample->size - (synthState->samplePosInt * 2); - } - // @port [Custom audio] - // TLDR samples are loaded async and might be null the first time they are played. - // See note in AudioSampleFactory.cpp - if ((void*) sampleAddr != NULL) { - aLoadBuffer(cmd++, sampleAddr + (synthState->samplePosInt * 2), DMEM_UNCOMPRESSED_NOTE, - bytesToRead); - } + numSamplesProcessed = numSamplesToLoadAdj; + dmemUncompressedAddrOffset1 = numSamplesToLoadAdj; goto skip; } From ddf9db7bb78fd276902f52487e0c04ce9c89772f Mon Sep 17 00:00:00 2001 From: coco875 <59367621+coco875@users.noreply.github.com> Date: Tue, 4 Feb 2025 23:15:21 +0100 Subject: [PATCH 6/6] Sound optimisation (#150) * add sse2neon and optimise aEnvMixerImpl * optimise aResampleImpl * optimise aMixImpl * optimise aADPCMdecImpl --- CMakeLists.txt | 6 + src/audio/mixer.c | 475 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 481 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffe5a58f..525123b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,6 +189,12 @@ if (MSVC) endif() endif() +#=================== SSE2NEON =================== +set(SSE2NEON_DIR ${CMAKE_BINARY_DIR}/_deps/sse2neon) +file(DOWNLOAD "https://raw.githubusercontent.com/DLTcollab/sse2neon/refs/heads/master/sse2neon.h" "${SSE2NEON_DIR}/sse2neon.h") + +include_directories(${SSE2NEON_DIR}) + FetchContent_Declare( dr_libs GIT_REPOSITORY https://github.com/mackron/dr_libs.git diff --git a/src/audio/mixer.c b/src/audio/mixer.c index d9da7769..73254973 100644 --- a/src/audio/mixer.c +++ b/src/audio/mixer.c @@ -3,12 +3,68 @@ #include #include +#include + #include "mixer.h" #ifndef __clang__ #pragma GCC optimize ("unroll-loops") #endif +#if defined(__SSE2__) || defined(__aarch64__) +#define SSE2_AVAILABLE +#else +#pragma message("Warning: SSE2 support is not available. Code will not compile") +#endif + +#if defined(__SSE2__) +#include +#elif defined(__aarch64__) +#include "sse2neon.h" +#endif + +#ifdef SSE2_AVAILABLE +typedef struct { + __m128i lo, hi; +} m256i; + +static m256i m256i_mul_epi16(__m128i a, __m128i b) { + m256i res; + res.lo = _mm_mullo_epi16(a, b); + res.hi = _mm_mulhi_epi16(a, b); + + m256i ret; + ret.lo = _mm_unpacklo_epi16(res.lo, res.hi); + ret.hi = _mm_unpackhi_epi16(res.lo, res.hi); + return ret; +} + +static m256i m256i_add_m256i_epi32(m256i a, m256i b) { + m256i res; + res.lo = _mm_add_epi32(a.lo, b.lo); + res.hi = _mm_add_epi32(a.hi, b.hi); + return res; +} + +static m256i m256i_add_m128i_epi32(m256i a, __m128i b) { + m256i res; + res.lo = _mm_add_epi32(a.lo, b); + res.hi = _mm_add_epi32(a.hi, b); + return res; +} + +static m256i m256i_srai(m256i a, int b) { + m256i res; + res.lo = _mm_srai_epi32(a.lo, b); + res.hi = _mm_srai_epi32(a.hi, b); + return res; +} + +static __m128i m256i_clamp_to_m128i(m256i a) { + return _mm_packs_epi32(a.lo, a.hi); +} +#endif + #define ROUND_UP_64(v) (((v) + 63) & ~63) #define ROUND_UP_32(v) (((v) + 31) & ~31) #define ROUND_UP_16(v) (((v) + 15) & ~15) @@ -218,6 +274,8 @@ void aSetLoopImpl(ADPCM_STATE *adpcm_loop_state) { rspa.adpcm_loop_state = adpcm_loop_state; } +#ifndef SSE2_AVAILABLE + void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) { uint8_t *in = BUF_U8(rspa.in); int16_t *out = BUF_S16(rspa.out); @@ -269,6 +327,133 @@ void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) { memcpy(state, out - 16, 16 * sizeof(int16_t)); } +#else + +static uint16_t lower_4bit[] = { + 0xf, + 0xf, + 0xf, + 0xf, +}; + +static uint16_t lower_2bit[] = { + 0x3, + 0x3, +}; + +void aADPCMdecImpl(uint8_t flags, ADPCM_STATE state) { + uint8_t* in = BUF_U8(rspa.in); + int16_t* out = BUF_S16(rspa.out); + int nbytes = ROUND_UP_32(rspa.nbytes); + if (flags & A_INIT) { + memset(out, 0, 16 * sizeof(int16_t)); + } else if (flags & A_LOOP) { + memcpy(out, rspa.adpcm_loop_state, 16 * sizeof(int16_t)); + } else { + memcpy(out, state, 16 * sizeof(int16_t)); + } + out += 16; + + __m128i mask_4bit = _mm_loadl_epi64((__m128i*) lower_4bit); + __m128i mask_2bit = _mm_loadl_epi64((__m128i*) lower_2bit); + + while (nbytes > 0) { + int shift = *in >> 4; // should be in 0..12 or 0..14 + __m128i shift_vec = _mm_set1_epi16(shift); + int table_index = *in++ & 0xf; // should be in 0..7 + int16_t(*tbl)[8] = rspa.adpcm_table[table_index]; + + for (int i = 0; i < 2; i++) { + int16_t ins[8]; + int16_t prev1 = out[-1]; + int16_t prev2 = out[-2]; + __m128i prev1_vec = _mm_set1_epi16(prev1); + __m128i prev2_vec = _mm_set1_epi16(prev2); + + __m128i ins_vec; + if (flags & 4) { + ins_vec = _mm_loadu_si16((__m128i*) in); + ins_vec = _mm_unpacklo_epi8(ins_vec, _mm_setzero_si128()); + __m128i in_vec_up2bit = _mm_srli_epi16(ins_vec, 6); + __m128i in_vec_uplower2bit = _mm_and_si128(_mm_srli_epi16(ins_vec, 4), mask_2bit); + __m128i in_vec_lowerup2bit = _mm_and_si128(_mm_srli_epi16(ins_vec, 2), mask_2bit); + __m128i in_vec_lower2bit = _mm_and_si128(ins_vec, mask_2bit); + __m128i in_vec_up = _mm_unpacklo_epi16(in_vec_up2bit, in_vec_uplower2bit); + in_vec_up = _mm_shuffle_epi32(in_vec_up, _MM_SHUFFLE(3, 1, 2, 0)); + __m128i in_vec_low = _mm_unpacklo_epi16(in_vec_lower2bit, in_vec_lowerup2bit); + in_vec_low = _mm_shuffle_epi32(in_vec_low, _MM_SHUFFLE(3, 1, 2, 0)); + ins_vec = _mm_unpacklo_epi32(in_vec_up, in_vec_low); + ins_vec = _mm_slli_epi16(ins_vec, 14); + ins_vec = _mm_srai_epi16(ins_vec, 14); + ins_vec = _mm_slli_epi16(ins_vec, shift); + + in += 2; + } else { + ins_vec = _mm_loadu_si32((__m128i*) in); + ins_vec = _mm_unpacklo_epi8(ins_vec, _mm_setzero_si128()); + __m128i in_vec_up4bit = _mm_srli_epi16(ins_vec, 4); + __m128i in_vec_lower4bit = _mm_and_si128(ins_vec, mask_4bit); + ins_vec = _mm_unpacklo_epi16(in_vec_up4bit, in_vec_lower4bit); + ins_vec = _mm_slli_epi16(ins_vec, 12); + ins_vec = _mm_srai_epi16(ins_vec, 12); + ins_vec = _mm_slli_epi16(ins_vec, shift); + + in += 4; + } + _mm_storeu_si128((__m128i*) ins, ins_vec); + + for (int j = 0; j < 2; j++) { + __m128i tbl0_vec = _mm_loadu_si64((__m128i*) (tbl[0] + (j * 4))); + __m128i tbl1_vec = _mm_loadu_si64((__m128i*) (tbl[1] + (j * 4))); + + m256i res; + res.lo = _mm_mullo_epi16(tbl0_vec, prev2_vec); + res.hi = _mm_mulhi_epi16(tbl0_vec, prev2_vec); + + tbl0_vec = _mm_unpacklo_epi16(res.lo, res.hi); + + res.lo = _mm_mullo_epi16(tbl1_vec, prev1_vec); + res.hi = _mm_mulhi_epi16(tbl1_vec, prev1_vec); + + tbl1_vec = _mm_unpacklo_epi16(res.lo, res.hi); + __m128i acc_vec = _mm_add_epi32(tbl0_vec, tbl1_vec); + + __m128i shift_ins = _mm_srai_epi32(j ? _mm_unpackhi_epi16(_mm_setzero_si128(), ins_vec) + : _mm_unpacklo_epi16(_mm_setzero_si128(), ins_vec), + 5); + acc_vec = _mm_add_epi32(acc_vec, shift_ins); + + tbl1_vec = _mm_loadu_si128((__m128i*) tbl[1]); + if (j == 0) { + tbl1_vec = _mm_slli_si128(tbl1_vec, (1 - 0) * 8 + 2); + } else { + tbl1_vec = _mm_slli_si128(tbl1_vec, (1 - 1) * 8 + 2); + } + for (int k = 0; k < ((j + 1) * 4); k++) { + __m128i ins_vec2 = _mm_set1_epi16(ins[k]); + res.lo = _mm_mullo_epi16(tbl1_vec, ins_vec2); + res.hi = _mm_mulhi_epi16(tbl1_vec, ins_vec2); + + __m128i mult = _mm_unpackhi_epi16(res.lo, res.hi); + acc_vec = _mm_add_epi32(acc_vec, mult); + tbl1_vec = _mm_slli_si128(tbl1_vec, 2); + } + + acc_vec = _mm_srai_epi32(acc_vec, 11); + acc_vec = _mm_packs_epi32(acc_vec, _mm_setzero_si128()); + _mm_storeu_si64((__m128*) out, acc_vec); + out += 4; + } + } + nbytes -= 16 * sizeof(int16_t); + } + memcpy(state, out - 16, 16 * sizeof(int16_t)); +} + +#endif + +#ifndef SSE2_AVAILABLE + void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) { int16_t tmp[16]; int16_t *in_initial = BUF_S16(rspa.in); @@ -320,6 +505,171 @@ void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) { memcpy(state + 8, in, 8 * sizeof(int16_t)); } +#else + +static const ALIGN_ASSET(16) int32_t x4000[4] = { + 0x4000, + 0x4000, + 0x4000, + 0x4000, +}; + +static void mm128_transpose(__m128i* r0, __m128i* r1, __m128i* r2, __m128i* r3) { + __m128 tmp0, tmp1, tmp2, tmp3; + __m128 row0, row1, row2, row3; + + row0 = _mm_castsi128_ps(*r0); + row1 = _mm_castsi128_ps(*r1); + row2 = _mm_castsi128_ps(*r2); + row3 = _mm_castsi128_ps(*r3); + + tmp0 = _mm_shuffle_ps(row0, row1, _MM_SHUFFLE(2, 0, 2, 0)); // 0 2 4 6 + tmp1 = _mm_shuffle_ps(row0, row1, _MM_SHUFFLE(3, 1, 3, 1)); // 1 3 5 7 + tmp2 = _mm_shuffle_ps(row2, row3, _MM_SHUFFLE(2, 0, 2, 0)); // 8 a c e + tmp3 = _mm_shuffle_ps(row2, row3, _MM_SHUFFLE(3, 1, 3, 1)); // 9 b d f + + row0 = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2, 0, 2, 0)); // 0 4 8 c + row1 = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2, 0, 2, 0)); // 1 5 9 d + row2 = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3, 1, 3, 1)); // 2 6 a e + row3 = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3, 1, 3, 1)); // 3 7 b f + + *r0 = _mm_castps_si128(row0); + *r1 = _mm_castps_si128(row1); + *r2 = _mm_castps_si128(row2); + *r3 = _mm_castps_si128(row3); +} + +static __m128i move_two_4x16(int16_t* a, int16_t* b) { + return _mm_set_epi64(_mm_movepi64_pi64(_mm_loadl_epi64((__m128i*) a)), + _mm_movepi64_pi64(_mm_loadl_epi64((__m128i*) b))); +} + +void aResampleImpl(uint8_t flags, uint16_t pitch, RESAMPLE_STATE state) { + int16_t tmp[32]; + int16_t* in_initial = BUF_S16(rspa.in); + int16_t* in = in_initial; + int16_t* out = BUF_S16(rspa.out); + int nbytes = ROUND_UP_16(rspa.nbytes); + uint32_t pitch_accumulator; + int i; + + if (flags & A_INIT) { + memset(tmp, 0, 5 * sizeof(int16_t)); + } else { + memcpy(tmp, state, 16 * sizeof(int16_t)); + } + if (flags & 2) { + memcpy(in - 8, tmp + 8, 8 * sizeof(int16_t)); + in -= tmp[5] / sizeof(int16_t); + } + in -= 4; + pitch_accumulator = (uint16_t) tmp[4]; + memcpy(in, tmp, 4 * sizeof(int16_t)); + + __m128i x4000Vec = _mm_load_si128((__m128i*) x4000); + + do { + for (i = 0; i < 2; i++) { + int16_t* tbl0 = resample_table[pitch_accumulator * 64 >> 16]; + + int16_t* in0 = in; + + pitch_accumulator += (pitch << 1); + in += pitch_accumulator >> 16; + pitch_accumulator %= 0x10000; + + int16_t* tbl1 = resample_table[pitch_accumulator * 64 >> 16]; + + int16_t* in1 = in; + + pitch_accumulator += (pitch << 1); + in += pitch_accumulator >> 16; + pitch_accumulator %= 0x10000; + + int16_t* tbl2 = resample_table[pitch_accumulator * 64 >> 16]; + + int16_t* in2 = in; + + pitch_accumulator += (pitch << 1); + in += pitch_accumulator >> 16; + pitch_accumulator %= 0x10000; + + int16_t* tbl3 = resample_table[pitch_accumulator * 64 >> 16]; + + int16_t* in3 = in; + + pitch_accumulator += (pitch << 1); + in += pitch_accumulator >> 16; + pitch_accumulator %= 0x10000; + + __m128i vec_in0 = move_two_4x16(in1, in0); + + __m128i vec_tbl0 = move_two_4x16(tbl1, tbl0); + + __m128i vec_in1 = move_two_4x16(in3, in2); + + __m128i vec_tbl1 = move_two_4x16(tbl3, tbl2); + + // we multiply in by tbl + + m256i res; + res.lo = _mm_mullo_epi16(vec_in0, vec_tbl0); + res.hi = _mm_mulhi_epi16(vec_in0, vec_tbl0); + + __m128i out0_vec = _mm_unpacklo_epi16(res.lo, res.hi); + __m128i out1_vec = _mm_unpackhi_epi16(res.lo, res.hi); + + res.lo = _mm_mullo_epi16(vec_in1, vec_tbl1); + res.hi = _mm_mulhi_epi16(vec_in1, vec_tbl1); + + __m128i out2_vec = _mm_unpacklo_epi16(res.lo, res.hi); + __m128i out3_vec = _mm_unpackhi_epi16(res.lo, res.hi); + + // transpose to more easily make a sum at the end + + mm128_transpose(&out0_vec, &out1_vec, &out2_vec, &out3_vec); + + // add 0x4000 + + out0_vec = _mm_add_epi32(out0_vec, x4000Vec); + out1_vec = _mm_add_epi32(out1_vec, x4000Vec); + out2_vec = _mm_add_epi32(out2_vec, x4000Vec); + out3_vec = _mm_add_epi32(out3_vec, x4000Vec); + + // shift by 15 + + out0_vec = _mm_srai_epi32(out0_vec, 15); + out1_vec = _mm_srai_epi32(out1_vec, 15); + out2_vec = _mm_srai_epi32(out2_vec, 15); + out3_vec = _mm_srai_epi32(out3_vec, 15); + + // sum all to make sample + __m128i sample_vec = _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(out0_vec, out1_vec), out2_vec), out3_vec); + + // at the end we do this below but four time + // sample = ((in[0] * tbl[0] + 0x4000) >> 15) + ((in[1] * tbl[1] + 0x4000) >> 15) + + // ((in[2] * tbl[2] + 0x4000) >> 15) + ((in[3] * tbl[3] + 0x4000) >> 15); + sample_vec = _mm_packs_epi32(sample_vec, _mm_setzero_si128()); + _mm_storeu_si64(out, sample_vec); + + out += 4; + } + nbytes -= 8 * sizeof(int16_t); + } while (nbytes > 0); + + state[4] = (int16_t) pitch_accumulator; + memcpy(state, in, 4 * sizeof(int16_t)); + i = (in - in_initial + 4) & 7; + in -= i; + if (i != 0) { + i = -8 - i; + } + state[5] = i; + memcpy(state + 8, in, 8 * sizeof(int16_t)); +} + +#endif + void aEnvSetup1Impl(uint8_t initial_vol_wet, uint16_t rate_wet, uint16_t rate_left, uint16_t rate_right) { rspa.vol_wet = (uint16_t)(initial_vol_wet << 8); rspa.rate_wet = rate_wet; @@ -332,6 +682,8 @@ void aEnvSetup2Impl(uint16_t initial_vol_left, uint16_t initial_vol_right) { rspa.vol[1] = initial_vol_right; } +#ifndef SSE2_AVAILABLE + void aEnvMixerImpl(uint16_t in_addr, uint16_t n_samples, bool swap_reverb, bool neg_3, bool neg_2, bool neg_left, bool neg_right, @@ -368,6 +720,64 @@ void aEnvMixerImpl(uint16_t in_addr, uint16_t n_samples, bool swap_reverb, } while (n > 0); } +#else +// SSE2 optimized version of algorithm +void aEnvMixerImpl(uint16_t in_addr, uint16_t n_samples, bool swap_reverb, + bool neg_3, bool neg_2, + bool neg_left, bool neg_right, + int32_t wet_dry_addr, u32 unk) +{ + int16_t *in = BUF_S16(in_addr); + int16_t *dry[2] = {BUF_S16(((wet_dry_addr >> 24) & 0xFF) << 4), BUF_S16(((wet_dry_addr >> 16) & 0xFF) << 4)}; + int16_t *wet[2] = {BUF_S16(((wet_dry_addr >> 8) & 0xFF) << 4), BUF_S16(((wet_dry_addr) & 0xFF) << 4)}; + int16_t negs[4] = {neg_left ? -1 : 0, neg_right ? -1 : 0, neg_3 ? -4 : 0, neg_2 ? -2 : 0}; + int n = ROUND_UP_16(n_samples); + const int n_aligned = n - (n % 8); + + uint16_t vols[2] = {rspa.vol[0], rspa.vol[1]}; + uint16_t rates[2] = {rspa.rate[0], rspa.rate[1]}; + uint16_t vol_wet = rspa.vol_wet; + uint16_t rate_wet = rspa.rate_wet; + + const __m128i* in_ptr = (__m128i*)in; + const __m128i* d_ptr[2] = { (__m128i*) dry[0], (__m128i*) dry[1] }; + const __m128i* w_ptr[2] = { (__m128i*) wet[0], (__m128i*) wet[1] }; + + // Aligned loop + for (int N = 0; N < n_aligned; N+=8) { + + // Init vectors + const __m128i in_channels = _mm_loadu_si128(in_ptr++); + __m128i d[2] = { _mm_loadu_si128(d_ptr[0]), _mm_loadu_si128(d_ptr[1]) }; + __m128i w[2] = { _mm_loadu_si128(w_ptr[0]), _mm_loadu_si128(w_ptr[1]) }; + + // Compute base samples + // sample = ((in * vols) >> 16) ^ negs + __m128i s[2] = { + _mm_xor_si128(_mm_mulhi_epi16(in_channels, _mm_set1_epi16(vols[0])), _mm_set1_epi16(negs[0])), + _mm_xor_si128(_mm_mulhi_epi16(in_channels, _mm_set1_epi16(vols[1])), _mm_set1_epi16(negs[1])) + }; + + // Compute left swapped samples + // (sample * vol_wet) >> 16) ^ negs + __m128i ss[2] = { + _mm_xor_si128(_mm_mulhi_epi16(s[swap_reverb], _mm_set1_epi16(vol_wet)), _mm_set1_epi16(negs[2])), + _mm_xor_si128(_mm_mulhi_epi16(s[!swap_reverb], _mm_set1_epi16(vol_wet)), _mm_set1_epi16(negs[3])) + }; + + // Store values to buffers + for (int j = 0; j < 2; j++) { + _mm_storeu_si128((__m128i*) d_ptr[j]++, _mm_adds_epi16(s[j], d[j])); + _mm_storeu_si128((__m128i*) w_ptr[j]++, _mm_adds_epi16(ss[j], w[j])); + vols[j] += rates[j]; + } + vol_wet += rate_wet; + } +} +#endif + +#ifndef SSE2_AVAILABLE + void aMixImpl(uint16_t count, int16_t gain, uint16_t in_addr, uint16_t out_addr) { int nbytes = ROUND_UP_32(ROUND_DOWN_16(count << 4)); int16_t *in = BUF_S16(in_addr); @@ -395,6 +805,71 @@ void aMixImpl(uint16_t count, int16_t gain, uint16_t in_addr, uint16_t out_addr) } } +#else + +static const ALIGN_ASSET(16) int16_t x7fff[8] = { + 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, +}; + +void aMixImpl(uint16_t count, int16_t gain, uint16_t in_addr, uint16_t out_addr) { + int nbytes = ROUND_UP_32(ROUND_DOWN_16(count << 4)); + int16_t* in = BUF_S16(in_addr); + int16_t* out = BUF_S16(out_addr); + int i; + int32_t sample; + + if (gain == -0x8000) { + while (nbytes > 0) { + for (unsigned int i = 0; i < 2; i++) { + __m128i outVec = _mm_loadu_si128((__m128i*) out); + __m128i inVec = _mm_loadu_si128((__m128i*) in); + __m128i subsVec = _mm_subs_epi16(outVec, inVec); + _mm_storeu_si128((__m128i*) out, subsVec); + nbytes -= 8 * sizeof(int16_t); + in += 8; + out += 8; + } + } + } + + __m128i x7fffVec = _mm_load_si128((__m128i*) x7fff); + __m128i x4000Vec = _mm_load_si128((__m128i*) x4000); + __m128i gainVec = _mm_set1_epi16(gain); + + while (nbytes > 0) { + for (i = 0; i < 2; i++) { + // Load input and output data into vectors + __m128i outVec = _mm_loadu_si128((__m128i*) out); + __m128i inVec = _mm_loadu_si128((__m128i*) in); + // Multiply `out` by `0x7FFF` producing 32 bit results, and store the upper and lower bits in each vector. + // Equivalent to `out[0..8] * 0x7FFF` + m256i outx7fff = m256i_mul_epi16(outVec, x7fffVec); + // Same as above but for in and gain. Equivalent to `in[0..8] * gain` + m256i inxGain = m256i_mul_epi16(inVec, gainVec); + in += 8; + + // Now we have 4 32 bit elements. Continue the calculaton per the reference implementation. + // We already did out + 0x7fff and in * gain. + // *out * 0x7fff + *in++ * gain is the final result of these two calculations. + m256i addVec = m256i_add_m256i_epi32(outx7fff, inxGain); + // Add 0x4000 + addVec = m256i_add_m128i_epi32(addVec, x4000Vec); + // Shift over by 15 + m256i shiftedVec = m256i_srai(addVec, 15); + // Convert each 32 bit element to 16 bit with saturation (clamp) and store in `outVec` + outVec = m256i_clamp_to_m128i(shiftedVec); + // Write the final vector back to memory + // The final calculation is ((out[0..8] * 0x7fff + in[0..8] * gain) + 0x4000) >> 15; + _mm_storeu_si128((__m128i*) out, outVec); + out += 8; + } + + nbytes -= 16 * sizeof(int16_t); + } +} + +#endif + void aS8DecImpl(uint8_t flags, ADPCM_STATE state) { uint8_t *in = BUF_U8(rspa.in); int16_t *out = BUF_S16(rspa.out);