axmol/3rdparty/openal/core/converter.cpp


#include "config.h"

#include "converter.h"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <limits.h>

#include "albit.h"
#include "alnumeric.h"
#include "fpu_ctrl.h"


namespace {

constexpr uint MaxPitch{10};

static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
    "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");

/* Base template left undefined. Should be marked =delete, but Clang 3.8.1
 * chokes on that given the inline specializations.
 */
template<DevFmtType T>
inline float LoadSample(DevFmtType_t<T> val) noexcept;

template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept
{ return val * (1.0f/128.0f); }
template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept
{ return val * (1.0f/32768.0f); }
template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept
{ return static_cast<float>(val) * (1.0f/2147483648.0f); }
template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept
{ return val; }

template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept
{ return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); }
template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept
{ return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); }
template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept
{ return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); }


template<DevFmtType T>
inline void LoadSampleArray(float *RESTRICT dst, const void *src, const size_t srcstep,
    const size_t samples) noexcept
{
    const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
    for(size_t i{0u};i < samples;i++)
        dst[i] = LoadSample<T>(ssrc[i*srcstep]);
}

void LoadSamples(float *dst, const void *src, const size_t srcstep, const DevFmtType srctype,
    const size_t samples) noexcept
{
#define HANDLE_FMT(T)                                                         \
    case T: LoadSampleArray<T>(dst, src, srcstep, samples); break
    switch(srctype)
    {
        HANDLE_FMT(DevFmtByte);
        HANDLE_FMT(DevFmtUByte);
        HANDLE_FMT(DevFmtShort);
        HANDLE_FMT(DevFmtUShort);
        HANDLE_FMT(DevFmtInt);
        HANDLE_FMT(DevFmtUInt);
        HANDLE_FMT(DevFmtFloat);
    }
#undef HANDLE_FMT
}


template<DevFmtType T>
inline DevFmtType_t<T> StoreSample(float) noexcept;

template<> inline float StoreSample<DevFmtFloat>(float val) noexcept
{ return val; }
template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept
{ return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); }
template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept
{ return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept
{ return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }

/* Define unsigned output variations. */
template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept
{ return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; }
template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept
{ return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); }
template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept
{ return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); }

template<DevFmtType T>
inline void StoreSampleArray(void *dst, const float *RESTRICT src, const size_t dststep,
    const size_t samples) noexcept
{
    DevFmtType_t<T> *sdst = static_cast<DevFmtType_t<T>*>(dst);
    for(size_t i{0u};i < samples;i++)
        sdst[i*dststep] = StoreSample<T>(src[i]);
}


void StoreSamples(void *dst, const float *src, const size_t dststep, const DevFmtType dsttype,
    const size_t samples) noexcept
{
#define HANDLE_FMT(T)                                                         \
    case T: StoreSampleArray<T>(dst, src, dststep, samples); break
    switch(dsttype)
    {
        HANDLE_FMT(DevFmtByte);
        HANDLE_FMT(DevFmtUByte);
        HANDLE_FMT(DevFmtShort);
        HANDLE_FMT(DevFmtUShort);
        HANDLE_FMT(DevFmtInt);
        HANDLE_FMT(DevFmtUInt);
        HANDLE_FMT(DevFmtFloat);
    }
#undef HANDLE_FMT
}


template<DevFmtType T>
void Mono2Stereo(float *RESTRICT dst, const void *src, const size_t frames) noexcept
{
    const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
    for(size_t i{0u};i < frames;i++)
        dst[i*2 + 1] = dst[i*2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f;
}

template<DevFmtType T>
void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst,
    const void *src, const size_t frames) noexcept
{
    const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
    std::fill_n(dst, frames, 0.0f);
    for(size_t c{0};chanmask;++c)
    {
        if((chanmask&1)) LIKELY
        {
            for(size_t i{0u};i < frames;i++)
                dst[i] += LoadSample<T>(ssrc[i*step + c]);
        }
        chanmask >>= 1;
    }
    for(size_t i{0u};i < frames;i++)
        dst[i] *= scale;
}

} // namespace

SampleConverterPtr SampleConverter::Create(DevFmtType srcType, DevFmtType dstType, size_t numchans,
    uint srcRate, uint dstRate, Resampler resampler)
{
    if(numchans < 1 || srcRate < 1 || dstRate < 1)
        return nullptr;

    SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}};
    converter->mSrcType = srcType;
    converter->mDstType = dstType;
    converter->mSrcTypeSize = BytesFromDevFmt(srcType);
    converter->mDstTypeSize = BytesFromDevFmt(dstType);

    converter->mSrcPrepCount = MaxResamplerPadding;
    converter->mFracOffset = 0;
    for(auto &chan : converter->mChan)
    {
        const al::span<float> buffer{chan.PrevSamples};
        std::fill(buffer.begin(), buffer.end(), 0.0f);
    }

    /* Have to set the mixer FPU mode since that's what the resampler code expects. */
    FPUCtl mixer_mode{};
    auto step = static_cast<uint>(
        mind(srcRate*double{MixerFracOne}/dstRate + 0.5, MaxPitch*MixerFracOne));
    converter->mIncrement = maxu(step, 1);
    if(converter->mIncrement == MixerFracOne)
        converter->mResample = [](const InterpState*, const float *RESTRICT src, uint, const uint,
            const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); };
    else
        converter->mResample = PrepareResampler(resampler, converter->mIncrement,
            &converter->mState);

    return converter;
}

uint SampleConverter::availableOut(uint srcframes) const
{
    if(srcframes < 1)
    {
        /* No output samples if there's no input samples. */
        return 0;
    }

    const uint prepcount{mSrcPrepCount};
    if(prepcount < MaxResamplerPadding && MaxResamplerPadding - prepcount >= srcframes)
    {
        /* Not enough input samples to generate an output sample. */
        return 0;
    }

    uint64_t DataSize64{prepcount};
    DataSize64 += srcframes;
    DataSize64 -= MaxResamplerPadding;
    DataSize64 <<= MixerFracBits;
    DataSize64 -= mFracOffset;

    /* If we have a full prep, we can generate at least one sample. */
    return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1,
        std::numeric_limits<int>::max()));
}

uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes)
{
    const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize};
    const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize};
    const uint increment{mIncrement};
    auto SamplesIn = static_cast<const std::byte*>(*src);
    uint NumSrcSamples{*srcframes};

    FPUCtl mixer_mode{};
    uint pos{0};
    while(pos < dstframes && NumSrcSamples > 0)
    {
        const uint prepcount{mSrcPrepCount};
        const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};

        if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
        {
            /* Not enough input samples to generate an output sample. Store
             * what we're given for later.
             */
            for(size_t chan{0u};chan < mChan.size();chan++)
                LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan,
                    mChan.size(), mSrcType, readable);

            mSrcPrepCount = prepcount + readable;
            NumSrcSamples = 0;
            break;
        }

        float *RESTRICT SrcData{mSrcSamples};
        float *RESTRICT DstData{mDstSamples};
        uint DataPosFrac{mFracOffset};
        uint64_t DataSize64{prepcount};
        DataSize64 += readable;
        DataSize64 -= MaxResamplerPadding;
        DataSize64 <<= MixerFracBits;
        DataSize64 -= DataPosFrac;

        /* If we have a full prep, we can generate at least one sample. */
        auto DstSize = static_cast<uint>(
            clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));
        DstSize = minu(DstSize, dstframes-pos);

        const uint DataPosEnd{DstSize*increment + DataPosFrac};
        const uint SrcDataEnd{DataPosEnd>>MixerFracBits};

        assert(prepcount+readable >= SrcDataEnd);
        const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};

        for(size_t chan{0u};chan < mChan.size();chan++)
        {
            const std::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan};
            std::byte *DstSamples = static_cast<std::byte*>(dst) + mDstTypeSize*chan;

            /* Load the previous samples into the source data first, then the
             * new samples from the input buffer.
             */
            std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);
            LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, readable);

            /* Store as many prep samples for next time as possible, given the
             * number of output samples being generated.
             */
            std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);
            std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,
                std::end(mChan[chan].PrevSamples), 0.0f);

            /* Now resample, and store the result in the output buffer. */
            mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment,
                {DstData, DstSize});

            StoreSamples(DstSamples, DstData, mChan.size(), mDstType, DstSize);
        }

        /* Update the number of prep samples still available, as well as the
         * fractional offset.
         */
        mSrcPrepCount = nextprep;
        mFracOffset = DataPosEnd & MixerFracMask;

        /* Update the src and dst pointers in case there's still more to do. */
        const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
        SamplesIn += SrcFrameSize*srcread;
        NumSrcSamples -= srcread;

        dst = static_cast<std::byte*>(dst) + DstFrameSize*DstSize;
        pos += DstSize;
    }

    *src = SamplesIn;
    *srcframes = NumSrcSamples;

    return pos;
}

uint SampleConverter::convertPlanar(const void **src, uint *srcframes, void **dst, uint dstframes)
{
    const uint increment{mIncrement};
    uint NumSrcSamples{*srcframes};

    FPUCtl mixer_mode{};
    uint pos{0};
    while(pos < dstframes && NumSrcSamples > 0)
    {
        const uint prepcount{mSrcPrepCount};
        const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};

        if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
        {
            /* Not enough input samples to generate an output sample. Store
             * what we're given for later.
             */
            for(size_t chan{0u};chan < mChan.size();chan++)
            {
                LoadSamples(&mChan[chan].PrevSamples[prepcount],
                    static_cast<const std::byte*>(src[chan]), 1, mSrcType, readable);
                src[chan] = static_cast<const std::byte*>(src[chan]) + mSrcTypeSize*readable;
            }

            mSrcPrepCount = prepcount + readable;
            NumSrcSamples = 0;
            break;
        }

        float *RESTRICT SrcData{mSrcSamples};
        float *RESTRICT DstData{mDstSamples};
        uint DataPosFrac{mFracOffset};
        uint64_t DataSize64{prepcount};
        DataSize64 += readable;
        DataSize64 -= MaxResamplerPadding;
        DataSize64 <<= MixerFracBits;
        DataSize64 -= DataPosFrac;

        /* If we have a full prep, we can generate at least one sample. */
        auto DstSize = static_cast<uint>(
            clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));
        DstSize = minu(DstSize, dstframes-pos);

        const uint DataPosEnd{DstSize*increment + DataPosFrac};
        const uint SrcDataEnd{DataPosEnd>>MixerFracBits};

        assert(prepcount+readable >= SrcDataEnd);
        const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};

        for(size_t chan{0u};chan < mChan.size();chan++)
        {
            /* Load the previous samples into the source data first, then the
             * new samples from the input buffer.
             */
            std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);
            LoadSamples(SrcData + prepcount, src[chan], 1, mSrcType, readable);

            /* Store as many prep samples for next time as possible, given the
             * number of output samples being generated.
             */
            std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);
            std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,
                std::end(mChan[chan].PrevSamples), 0.0f);

            /* Now resample, and store the result in the output buffer. */
            mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment,
                {DstData, DstSize});

            std::byte *DstSamples = static_cast<std::byte*>(dst[chan]) + pos*mDstTypeSize;
            StoreSamples(DstSamples, DstData, 1, mDstType, DstSize);
        }

        /* Update the number of prep samples still available, as well as the
         * fractional offset.
         */
        mSrcPrepCount = nextprep;
        mFracOffset = DataPosEnd & MixerFracMask;

        /* Update the src and dst pointers in case there's still more to do. */
        const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
        for(size_t chan{0u};chan < mChan.size();chan++)
            src[chan] = static_cast<const std::byte*>(src[chan]) + mSrcTypeSize*srcread;
        NumSrcSamples -= srcread;

        pos += DstSize;
    }

    *srcframes = NumSrcSamples;

    return pos;
}


void ChannelConverter::convert(const void *src, float *dst, uint frames) const
{
    if(mDstChans == DevFmtMono)
    {
        const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))};
        switch(mSrcType)
        {
#define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break
        HANDLE_FMT(DevFmtByte);
        HANDLE_FMT(DevFmtUByte);
        HANDLE_FMT(DevFmtShort);
        HANDLE_FMT(DevFmtUShort);
        HANDLE_FMT(DevFmtInt);
        HANDLE_FMT(DevFmtUInt);
        HANDLE_FMT(DevFmtFloat);
#undef HANDLE_FMT
        }
    }
    else if(mChanMask == 0x1 && mDstChans == DevFmtStereo)
    {
        switch(mSrcType)
        {
#define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break
        HANDLE_FMT(DevFmtByte);
        HANDLE_FMT(DevFmtUByte);
        HANDLE_FMT(DevFmtShort);
        HANDLE_FMT(DevFmtUShort);
        HANDLE_FMT(DevFmtInt);
        HANDLE_FMT(DevFmtUInt);
        HANDLE_FMT(DevFmtFloat);
#undef HANDLE_FMT
        }
    }
}
Prepare 2.1.0 release * Update copyright notice * Remove prebuilt libs from SCM * Remove optional thirdparty sources/repo from SCM * Remove tests resources folder 'Content' from SCM 2023-12-08 00:13:39 +08:00
			`#include "config.h"`

			`#include "converter.h"`

			`#include <algorithm>`
			`#include <cassert>`
			`#include <cmath>`
			`#include <cstddef>`
			`#include <cstdint>`
			`#include <iterator>`
			`#include <limits.h>`

			`#include "albit.h"`
			`#include "alnumeric.h"`
			`#include "fpu_ctrl.h"`


			`namespace {`

			`constexpr uint MaxPitch{10};`

			`static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");`
			`static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,`
			`"MaxPitch and/or BufferLineSize are too large for MixerFracBits!");`

			`/* Base template left undefined. Should be marked =delete, but Clang 3.8.1`
			`* chokes on that given the inline specializations.`
			`*/`
			`template<DevFmtType T>`
			`inline float LoadSample(DevFmtType_t<T> val) noexcept;`

			`template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept`
			`{ return val * (1.0f/128.0f); }`
			`template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept`
			`{ return val * (1.0f/32768.0f); }`
			`template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept`
			`{ return static_cast<float>(val) * (1.0f/2147483648.0f); }`
			`template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept`
			`{ return val; }`

			`template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept`
			`{ return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); }`
			`template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept`
			`{ return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); }`
			`template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept`
			`{ return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); }`


			`template<DevFmtType T>`
			`inline void LoadSampleArray(float RESTRICT dst, const void src, const size_t srcstep,`
			`const size_t samples) noexcept`
			`{`
			`const DevFmtType_t<T> ssrc = static_cast<const DevFmtType_t<T>>(src);`
			`for(size_t i{0u};i < samples;i++)`
			`dst[i] = LoadSample<T>(ssrc[i*srcstep]);`
			`}`

			`void LoadSamples(float dst, const void src, const size_t srcstep, const DevFmtType srctype,`
			`const size_t samples) noexcept`
			`{`
			`#define HANDLE_FMT(T) \`
			`case T: LoadSampleArray<T>(dst, src, srcstep, samples); break`
			`switch(srctype)`
			`{`
			`HANDLE_FMT(DevFmtByte);`
			`HANDLE_FMT(DevFmtUByte);`
			`HANDLE_FMT(DevFmtShort);`
			`HANDLE_FMT(DevFmtUShort);`
			`HANDLE_FMT(DevFmtInt);`
			`HANDLE_FMT(DevFmtUInt);`
			`HANDLE_FMT(DevFmtFloat);`
			`}`
			`#undef HANDLE_FMT`
			`}`


			`template<DevFmtType T>`
			`inline DevFmtType_t<T> StoreSample(float) noexcept;`

			`template<> inline float StoreSample<DevFmtFloat>(float val) noexcept`
			`{ return val; }`
			`template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept`
			`{ return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); }`
			`template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept`
			`{ return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }`
			`template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept`
			`{ return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }`

			`/* Define unsigned output variations. */`
			`template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept`
			`{ return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; }`
			`template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept`
			`{ return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); }`
			`template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept`
			`{ return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); }`

			`template<DevFmtType T>`
			`inline void StoreSampleArray(void dst, const float RESTRICT src, const size_t dststep,`
			`const size_t samples) noexcept`
			`{`
			`DevFmtType_t<T> sdst = static_cast<DevFmtType_t<T>>(dst);`
			`for(size_t i{0u};i < samples;i++)`
			`sdst[i*dststep] = StoreSample<T>(src[i]);`
			`}`


			`void StoreSamples(void dst, const float src, const size_t dststep, const DevFmtType dsttype,`
			`const size_t samples) noexcept`
			`{`
			`#define HANDLE_FMT(T) \`
			`case T: StoreSampleArray<T>(dst, src, dststep, samples); break`
			`switch(dsttype)`
			`{`
			`HANDLE_FMT(DevFmtByte);`
			`HANDLE_FMT(DevFmtUByte);`
			`HANDLE_FMT(DevFmtShort);`
			`HANDLE_FMT(DevFmtUShort);`
			`HANDLE_FMT(DevFmtInt);`
			`HANDLE_FMT(DevFmtUInt);`
			`HANDLE_FMT(DevFmtFloat);`
			`}`
			`#undef HANDLE_FMT`
			`}`


			`template<DevFmtType T>`
			`void Mono2Stereo(float RESTRICT dst, const void src, const size_t frames) noexcept`
			`{`
			`const DevFmtType_t<T> ssrc = static_cast<const DevFmtType_t<T>>(src);`
			`for(size_t i{0u};i < frames;i++)`
			`dst[i2 + 1] = dst[i2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f;`
			`}`

			`template<DevFmtType T>`
			`void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst,`
			`const void *src, const size_t frames) noexcept`
			`{`
			`const DevFmtType_t<T> ssrc = static_cast<const DevFmtType_t<T>>(src);`
			`std::fill_n(dst, frames, 0.0f);`
			`for(size_t c{0};chanmask;++c)`
			`{`
			`if((chanmask&1)) LIKELY`
			`{`
			`for(size_t i{0u};i < frames;i++)`
			`dst[i] += LoadSample<T>(ssrc[i*step + c]);`
			`}`
			`chanmask >>= 1;`
			`}`
			`for(size_t i{0u};i < frames;i++)`
			`dst[i] *= scale;`
			`}`

			`} // namespace`

			`SampleConverterPtr SampleConverter::Create(DevFmtType srcType, DevFmtType dstType, size_t numchans,`
			`uint srcRate, uint dstRate, Resampler resampler)`
			`{`
			`if(numchans < 1 \|\| srcRate < 1 \|\| dstRate < 1)`
			`return nullptr;`

			`SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}};`
			`converter->mSrcType = srcType;`
			`converter->mDstType = dstType;`
			`converter->mSrcTypeSize = BytesFromDevFmt(srcType);`
			`converter->mDstTypeSize = BytesFromDevFmt(dstType);`

			`converter->mSrcPrepCount = MaxResamplerPadding;`
			`converter->mFracOffset = 0;`
			`for(auto &chan : converter->mChan)`
			`{`
			`const al::span<float> buffer{chan.PrevSamples};`
			`std::fill(buffer.begin(), buffer.end(), 0.0f);`
			`}`

			`/* Have to set the mixer FPU mode since that's what the resampler code expects. */`
			`FPUCtl mixer_mode{};`
			`auto step = static_cast<uint>(`
			`mind(srcRatedouble{MixerFracOne}/dstRate + 0.5, MaxPitchMixerFracOne));`
			`converter->mIncrement = maxu(step, 1);`
			`if(converter->mIncrement == MixerFracOne)`
			`converter->mResample = [](const InterpState, const float RESTRICT src, uint, const uint,`
			`const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); };`
			`else`
			`converter->mResample = PrepareResampler(resampler, converter->mIncrement,`
			`&converter->mState);`

			`return converter;`
			`}`

			`uint SampleConverter::availableOut(uint srcframes) const`
			`{`
			`if(srcframes < 1)`
			`{`
			`/* No output samples if there's no input samples. */`
			`return 0;`
			`}`

			`const uint prepcount{mSrcPrepCount};`
			`if(prepcount < MaxResamplerPadding && MaxResamplerPadding - prepcount >= srcframes)`
			`{`
			`/* Not enough input samples to generate an output sample. */`
			`return 0;`
			`}`

			`uint64_t DataSize64{prepcount};`
			`DataSize64 += srcframes;`
			`DataSize64 -= MaxResamplerPadding;`
			`DataSize64 <<= MixerFracBits;`
			`DataSize64 -= mFracOffset;`

			`/* If we have a full prep, we can generate at least one sample. */`
			`return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1,`
			`std::numeric_limits<int>::max()));`
			`}`

			`uint SampleConverter::convert(const void *src, uint srcframes, void *dst, uint dstframes)`
			`{`
			`const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize};`
			`const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize};`
			`const uint increment{mIncrement};`
			`auto SamplesIn = static_cast<const std::byte>(src);`
			`uint NumSrcSamples{*srcframes};`

			`FPUCtl mixer_mode{};`
			`uint pos{0};`
			`while(pos < dstframes && NumSrcSamples > 0)`
			`{`
			`const uint prepcount{mSrcPrepCount};`
			`const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};`

			`if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)`
			`{`
			`/* Not enough input samples to generate an output sample. Store`
			`* what we're given for later.`
			`*/`
			`for(size_t chan{0u};chan < mChan.size();chan++)`
			`LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan,`
			`mChan.size(), mSrcType, readable);`

			`mSrcPrepCount = prepcount + readable;`
			`NumSrcSamples = 0;`
			`break;`
			`}`

			`float *RESTRICT SrcData{mSrcSamples};`
			`float *RESTRICT DstData{mDstSamples};`
			`uint DataPosFrac{mFracOffset};`
			`uint64_t DataSize64{prepcount};`
			`DataSize64 += readable;`
			`DataSize64 -= MaxResamplerPadding;`
			`DataSize64 <<= MixerFracBits;`
			`DataSize64 -= DataPosFrac;`

			`/* If we have a full prep, we can generate at least one sample. */`
			`auto DstSize = static_cast<uint>(`
			`clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));`
			`DstSize = minu(DstSize, dstframes-pos);`

			`const uint DataPosEnd{DstSize*increment + DataPosFrac};`
			`const uint SrcDataEnd{DataPosEnd>>MixerFracBits};`

			`assert(prepcount+readable >= SrcDataEnd);`
			`const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};`

			`for(size_t chan{0u};chan < mChan.size();chan++)`
			`{`
			`const std::byte SrcSamples{SamplesIn + mSrcTypeSizechan};`
			`std::byte DstSamples = static_cast<std::byte>(dst) + mDstTypeSize*chan;`

			`/* Load the previous samples into the source data first, then the`
			`* new samples from the input buffer.`
			`*/`
			`std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);`
			`LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, readable);`

			`/* Store as many prep samples for next time as possible, given the`
			`* number of output samples being generated.`
			`*/`
			`std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);`
			`std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,`
			`std::end(mChan[chan].PrevSamples), 0.0f);`

			`/* Now resample, and store the result in the output buffer. */`
			`mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment,`
			`{DstData, DstSize});`

			`StoreSamples(DstSamples, DstData, mChan.size(), mDstType, DstSize);`
			`}`

			`/* Update the number of prep samples still available, as well as the`
			`* fractional offset.`
			`*/`
			`mSrcPrepCount = nextprep;`
			`mFracOffset = DataPosEnd & MixerFracMask;`

			`/* Update the src and dst pointers in case there's still more to do. */`
			`const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};`
			`SamplesIn += SrcFrameSize*srcread;`
			`NumSrcSamples -= srcread;`

			`dst = static_cast<std::byte>(dst) + DstFrameSizeDstSize;`
			`pos += DstSize;`
			`}`

			`*src = SamplesIn;`
			`*srcframes = NumSrcSamples;`

			`return pos;`
			`}`

			`uint SampleConverter::convertPlanar(const void *src, uint srcframes, void **dst, uint dstframes)`
			`{`
			`const uint increment{mIncrement};`
			`uint NumSrcSamples{*srcframes};`

			`FPUCtl mixer_mode{};`
			`uint pos{0};`
			`while(pos < dstframes && NumSrcSamples > 0)`
			`{`
			`const uint prepcount{mSrcPrepCount};`
			`const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};`

			`if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)`
			`{`
			`/* Not enough input samples to generate an output sample. Store`
			`* what we're given for later.`
			`*/`
			`for(size_t chan{0u};chan < mChan.size();chan++)`
			`{`
			`LoadSamples(&mChan[chan].PrevSamples[prepcount],`
			`static_cast<const std::byte*>(src[chan]), 1, mSrcType, readable);`
			`src[chan] = static_cast<const std::byte>(src[chan]) + mSrcTypeSizereadable;`
			`}`

			`mSrcPrepCount = prepcount + readable;`
			`NumSrcSamples = 0;`
			`break;`
			`}`

			`float *RESTRICT SrcData{mSrcSamples};`
			`float *RESTRICT DstData{mDstSamples};`
			`uint DataPosFrac{mFracOffset};`
			`uint64_t DataSize64{prepcount};`
			`DataSize64 += readable;`
			`DataSize64 -= MaxResamplerPadding;`
			`DataSize64 <<= MixerFracBits;`
			`DataSize64 -= DataPosFrac;`

			`/* If we have a full prep, we can generate at least one sample. */`
			`auto DstSize = static_cast<uint>(`
			`clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));`
			`DstSize = minu(DstSize, dstframes-pos);`

			`const uint DataPosEnd{DstSize*increment + DataPosFrac};`
			`const uint SrcDataEnd{DataPosEnd>>MixerFracBits};`

			`assert(prepcount+readable >= SrcDataEnd);`
			`const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};`

			`for(size_t chan{0u};chan < mChan.size();chan++)`
			`{`
			`/* Load the previous samples into the source data first, then the`
			`* new samples from the input buffer.`
			`*/`
			`std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);`
			`LoadSamples(SrcData + prepcount, src[chan], 1, mSrcType, readable);`

			`/* Store as many prep samples for next time as possible, given the`
			`* number of output samples being generated.`
			`*/`
			`std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);`
			`std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,`
			`std::end(mChan[chan].PrevSamples), 0.0f);`

			`/* Now resample, and store the result in the output buffer. */`
			`mResample(&mState, SrcData+MaxResamplerEdge, DataPosFrac, increment,`
			`{DstData, DstSize});`

			`std::byte DstSamples = static_cast<std::byte>(dst[chan]) + pos*mDstTypeSize;`
			`StoreSamples(DstSamples, DstData, 1, mDstType, DstSize);`
			`}`

			`/* Update the number of prep samples still available, as well as the`
			`* fractional offset.`
			`*/`
			`mSrcPrepCount = nextprep;`
			`mFracOffset = DataPosEnd & MixerFracMask;`

			`/* Update the src and dst pointers in case there's still more to do. */`
			`const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};`
			`for(size_t chan{0u};chan < mChan.size();chan++)`
			`src[chan] = static_cast<const std::byte>(src[chan]) + mSrcTypeSizesrcread;`
			`NumSrcSamples -= srcread;`

			`pos += DstSize;`
			`}`

			`*srcframes = NumSrcSamples;`

			`return pos;`
			`}`


			`void ChannelConverter::convert(const void src, float dst, uint frames) const`
			`{`
			`if(mDstChans == DevFmtMono)`
			`{`
			`const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))};`
			`switch(mSrcType)`
			`{`
			`#define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break`
			`HANDLE_FMT(DevFmtByte);`
			`HANDLE_FMT(DevFmtUByte);`
			`HANDLE_FMT(DevFmtShort);`
			`HANDLE_FMT(DevFmtUShort);`
			`HANDLE_FMT(DevFmtInt);`
			`HANDLE_FMT(DevFmtUInt);`
			`HANDLE_FMT(DevFmtFloat);`
			`#undef HANDLE_FMT`
			`}`
			`}`
			`else if(mChanMask == 0x1 && mDstChans == DevFmtStereo)`
			`{`
			`switch(mSrcType)`
			`{`
			`#define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break`
			`HANDLE_FMT(DevFmtByte);`
			`HANDLE_FMT(DevFmtUByte);`
			`HANDLE_FMT(DevFmtShort);`
			`HANDLE_FMT(DevFmtUShort);`
			`HANDLE_FMT(DevFmtInt);`
			`HANDLE_FMT(DevFmtUInt);`
			`HANDLE_FMT(DevFmtFloat);`
			`#undef HANDLE_FMT`
			`}`
			`}`
			`}`