t-mat · April 29, 2022 10:29
diff --git a/WasapiCapture.cpp b/WasapiCapture.cpp
 // Capture WASAPI audio endpoint
 // https://docs.microsoft.com/en-us/windows/desktop/coreaudio/capturing-a-stream

 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <mmdeviceapi.h>
 #include <audioclient.h>
 #include <Functiondiscoverykeys_devpkey.h>  // PKEY_Device_FriendlyName
 #include <fcntl.h>                          // _O_U16TEXT
 #include <io.h>
 #include <assert.h>
 #include <functional>

 class WasapiCapturer {
 public:
    static const int DefaultDeviceIndex = -1;

    WasapiCapturer() {}
    ~WasapiCapturer() {
        close();
    }

    bool open(int deviceIndex = DefaultDeviceIndex) {
        bool result = false;
        WAVEFORMATEX* wfex = nullptr;
        HRESULT hr = S_OK;

        hr = GetMmDevice(deviceIndex, &mmDevice);
        if(FAILED(hr)) { goto end; }

        hr = mmDevice->Activate(
              __uuidof(IAudioClient)
            , CLSCTX_ALL
            , nullptr
            , (void**)&pAudioClient
        );
        if(FAILED(hr)) { goto end; }

        hr = pAudioClient->GetMixFormat(&wfex);
        if(FAILED(hr)) { goto end; }

        wfFormatTag = wfex->wFormatTag;
        wfChannels = wfex->nChannels;
        wfSamplesPerSec = wfex->nChannels;
        wfBitsPerSample = wfex->wBitsPerSample;
        if(wfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
            const auto* wfx = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(wfex);
            if(wfx->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
                wfFloat = true;
            }
        }

        hr = pAudioClient->Initialize(
              AUDCLNT_SHAREMODE_SHARED
            , 0 | AUDCLNT_STREAMFLAGS_LOOPBACK
            , static_cast<int>(10000000.0 * 0.1)  // 0.1 sec
            , 0
            , wfex
            , nullptr
        );
        if(FAILED(hr)) { goto end; }

        hr = pAudioClient->GetService(
              __uuidof(IAudioCaptureClient)
            , (void**)&pCaptureClient
        );
        if(FAILED(hr)) { goto end; }

        hr = pAudioClient->Start();
        if(FAILED(hr)) { goto end; }

        result = true;

    end:
        if(wfex) {
            CoTaskMemFree(wfex);
            wfex = nullptr;
        }
        if(!result) {
            close();
        }
        return result;
    }

    void close() {
        if(pAudioClient) {
            pAudioClient->Stop();
        }

        SafeRelease(pCaptureClient);
        SafeRelease(pAudioClient);
        SafeRelease(mmDevice);
    }

    IMMDevice* getMmDevice() const {
        return mmDevice;
    }

    using GetDataCallback = std::function<
        void(
              const char* data
            , int numFrames
            , int bytesPerFrame
            , bool isFloatSample
        )
    >;

    void getData(const GetDataCallback& callback) {
        HRESULT hr = S_OK;

        UINT packetLength = 0;
        hr = pCaptureClient->GetNextPacketSize(&packetLength);
        if(FAILED(hr)) { goto end; }

        while(packetLength != 0) {
            BYTE* pData = nullptr;
            UINT numFramesAvailable = 0;
            DWORD flags = 0;
            hr = pCaptureClient->GetBuffer(
                  &pData
                , &numFramesAvailable
                , &flags
                , nullptr
                , nullptr
            );
            if(FAILED(hr)) { break; }

            if(flags & AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = nullptr;
            }

            if(pData) {
                callback(
                      reinterpret_cast<char*>(pData)
                    , numFramesAvailable
                    , (wfBitsPerSample / 8) * wfChannels
                    , wfFloat
                );
            }
            hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
            if(FAILED(hr)) { break; }

            hr = pCaptureClient->GetNextPacketSize(&packetLength);
            if(FAILED(hr)) { break; }
        }

    end:
        ;
    }

    static std::tuple<std::wstring, std::wstring> getDeviceInfo(
        IMMDevice* mmDevice
    ) {
        std::wstring wsDeviceId;
        std::wstring wsDeviceName;

        HRESULT hr = S_OK;
        IPropertyStore* propertyStore = nullptr;

        WCHAR* deviceId = nullptr;
        hr = mmDevice->GetId(&deviceId);
        if(FAILED(hr)) goto end;

        hr = mmDevice->OpenPropertyStore(STGM_READ, &propertyStore);
        if(FAILED(hr)) goto end;

        PROPVARIANT deviceName;
        PropVariantInit(&deviceName);
        hr = propertyStore->GetValue(PKEY_Device_FriendlyName, &deviceName);
        if(FAILED(hr)) goto end;

        wsDeviceId = deviceId;
        wsDeviceName = deviceName.pwszVal;

    end:
        SafeRelease(propertyStore);
        if(deviceId) {
            CoTaskMemFree(deviceId);
            deviceId = nullptr;
        }

        return std::make_tuple(wsDeviceId, wsDeviceName);
    }

    using EnumAudioEndpointsCallback = std::function<
        void(int deviceIndex, IMMDevice* mmDevice)
    >;

    static HRESULT enumAudioEndpoints(
        const EnumAudioEndpointsCallback& callback
    ) {
        const EDataFlow dataFlow = eRender;
        const ERole role = eConsole;
        const DWORD dwStateMask = DEVICE_STATE_ACTIVE;

        HRESULT hr = S_OK;

        IMMDeviceEnumerator* mmDeviceEnumerator = nullptr;
        IMMDeviceCollection* mmDeviceCollection = nullptr;

        hr = CoCreateInstance(
              __uuidof(MMDeviceEnumerator)
            , nullptr
            , CLSCTX_INPROC_SERVER
            , IID_PPV_ARGS(&mmDeviceEnumerator)
        );
        if(FAILED(hr)) goto end;

        hr = mmDeviceEnumerator->EnumAudioEndpoints(
              dataFlow
            , dwStateMask
            , &mmDeviceCollection
        );
        if(FAILED(hr)) goto end;

        {
            UINT nCount = 0;
            hr = mmDeviceCollection->GetCount(&nCount);
            if(FAILED(hr)) goto end;

            for(UINT iCount = 0; iCount < nCount; ++iCount) {
                IMMDevice* mmDevice = nullptr;
                hr = mmDeviceCollection->Item(iCount, &mmDevice);
                if(SUCCEEDED(hr)) {
                    callback(static_cast<int>(iCount), mmDevice);
                }
                SafeRelease(mmDevice);
            }
        }

    end:
        SafeRelease(mmDeviceCollection);
        SafeRelease(mmDeviceEnumerator);
        return hr;
    }

    static HRESULT GetMmDevice(int deviceIndex, IMMDevice** ppMmDevice) {
        const EDataFlow dataFlow = eRender;
        const ERole role = eConsole;
        const DWORD dwStateMask = DEVICE_STATE_ACTIVE;

        HRESULT hr = S_OK;
        *ppMmDevice = nullptr;

        IMMDeviceEnumerator* mmDeviceEnumerator = nullptr;
        IMMDeviceCollection* mmDeviceCollection = nullptr;

        hr = CoCreateInstance(
              __uuidof(MMDeviceEnumerator)
            , nullptr
            , CLSCTX_INPROC_SERVER
            , IID_PPV_ARGS(&mmDeviceEnumerator)
        );
        if(FAILED(hr)) goto end;

        if(deviceIndex < 0) {
            hr = mmDeviceEnumerator->GetDefaultAudioEndpoint(
                  dataFlow
                , role
                , ppMmDevice
            );
            if(FAILED(hr)) goto end;
        } else {
            hr = mmDeviceEnumerator->EnumAudioEndpoints(
                  dataFlow
                , dwStateMask
                , &mmDeviceCollection
            );
            if(FAILED(hr)) goto end;

            hr = mmDeviceCollection->Item(
                  static_cast<UINT>(deviceIndex)
                , ppMmDevice
            );
            if(FAILED(hr)) goto end;
        }

    end:
        SafeRelease(mmDeviceCollection);
        SafeRelease(mmDeviceEnumerator);
        return hr;
    }


    IMMDevice* mmDevice = nullptr;
    IAudioClient* pAudioClient = nullptr;
    IAudioCaptureClient* pCaptureClient = nullptr;

    int wfFormatTag = 0;
    int wfChannels = 0;
    int wfSamplesPerSec = 0;
    int wfBitsPerSample = 0;
    bool wfFloat = false;

 private:
    template<typename T>
    static void SafeRelease(T& p) {
        if(p) {
            p->Release();
            p = nullptr;
        }
    }
 };


 void test_EnumEndpoints() {
    WasapiCapturer::enumAudioEndpoints([&](
          int deviceIndex
        , IMMDevice* mmDevice
    ) {
        std::wstring deviceId;
        std::wstring deviceName;

        std::tie(deviceId, deviceName)
            = WasapiCapturer::getDeviceInfo(mmDevice);

        wprintf(L"deviceIndex = %d\n", deviceIndex);
        wprintf(L"    deviceId   : <%s>\n", deviceId.c_str());
        wprintf(L"    deviceName : <%s>\n", deviceName.c_str());
    });
    wprintf(L"\n");
 }


 void test_AudioCapture() {
    // You can set other device index value to deviceIndex to capture
    // specific device.
    // This device index value is same index value which you can retrieve
    // from first argument of WasapiCapturer::enumAudioEndpoints().
    int deviceIndex = WasapiCapturer::DefaultDeviceIndex;
    WasapiCapturer wasapi;
    wasapi.open(deviceIndex);

    {
        std::wstring deviceId;
        std::wstring deviceName;
        std::tie(deviceId, deviceName)
            = WasapiCapturer::getDeviceInfo(wasapi.getMmDevice());
        wprintf(L"Capturing(#%d, <%s>)\n", deviceIndex, deviceName.c_str());
    }

    wprintf(L"Press ESCAPE key to exit\n");

    bool bLoop = true;
    while(bLoop) {
        Sleep(10);
        if(GetAsyncKeyState(VK_ESCAPE) != 0) {
            bLoop = false;
        }

        const int maxBarLength = 32;
        int barLength = 0;

        wasapi.getData([&](
              const char* pData
            , int numFrames
            , int bytesPerFrame
            , bool isFloatSample
        ) {
            float m = 0.0f;
            if(isFloatSample) {
                // Calculate RMS
                for(int iFrame = 0; iFrame < numFrames; ++iFrame) {
                    const auto* frameData = reinterpret_cast<const char*>(
                        pData + bytesPerFrame * iFrame
                    );
                    auto l = * reinterpret_cast<const float*>(frameData + 0);
                    auto r = * reinterpret_cast<const float*>(frameData + 4);
                    m += l*l;
                }
                m = sqrt(m / numFrames);
            }
            const auto bl = (maxBarLength * m);
            barLength = static_cast<int>(max(barLength, bl));
        });

        wprintf(L"\r");
        for(int i = 0; i < maxBarLength; ++i) {
            wprintf(L"%c", (i < barLength) ? '#' : ' ');
        }
        barLength = 0;
    }
    wasapi.close();
 }


 int main() {
    OleInitialize(nullptr);
    CoInitializeEx(nullptr, COINIT_MULTITHREADED);
    const auto prevConsoleMode = _setmode(_fileno(stdout), _O_U16TEXT);

    test_EnumEndpoints();
    test_AudioCapture();

    _setmode(_fileno(stdout), prevConsoleMode);
    CoUninitialize();
 }
	// Capture WASAPI audio endpoint
	// https://docs.microsoft.com/en-us/windows/desktop/coreaudio/capturing-a-stream

	#define WIN32_LEAN_AND_MEAN
	#include <windows.h>
	#include <mmdeviceapi.h>
	#include <audioclient.h>
	#include <Functiondiscoverykeys_devpkey.h> // PKEY_Device_FriendlyName
	#include <fcntl.h> // _O_U16TEXT
	#include <io.h>
	#include <assert.h>
	#include <functional>

	class WasapiCapturer {
	public:
	static const int DefaultDeviceIndex = -1;

	WasapiCapturer() {}
	~WasapiCapturer() {
	close();
	}

	bool open(int deviceIndex = DefaultDeviceIndex) {
	bool result = false;
	WAVEFORMATEX* wfex = nullptr;
	HRESULT hr = S_OK;

	hr = GetMmDevice(deviceIndex, &mmDevice);
	if(FAILED(hr)) { goto end; }

	hr = mmDevice->Activate(
	__uuidof(IAudioClient)
	, CLSCTX_ALL
	, nullptr
	, (void**)&pAudioClient
	);
	if(FAILED(hr)) { goto end; }

	hr = pAudioClient->GetMixFormat(&wfex);
	if(FAILED(hr)) { goto end; }

	wfFormatTag = wfex->wFormatTag;
	wfChannels = wfex->nChannels;
	wfSamplesPerSec = wfex->nChannels;
	wfBitsPerSample = wfex->wBitsPerSample;
	if(wfex->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
	const auto* wfx = reinterpret_cast<WAVEFORMATEXTENSIBLE*>(wfex);
	if(wfx->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) {
	wfFloat = true;
	}
	}

	hr = pAudioClient->Initialize(
	AUDCLNT_SHAREMODE_SHARED
	, 0 \| AUDCLNT_STREAMFLAGS_LOOPBACK
	, static_cast<int>(10000000.0 * 0.1) // 0.1 sec
	, 0
	, wfex
	, nullptr
	);
	if(FAILED(hr)) { goto end; }

	hr = pAudioClient->GetService(
	__uuidof(IAudioCaptureClient)
	, (void**)&pCaptureClient
	);
	if(FAILED(hr)) { goto end; }

	hr = pAudioClient->Start();
	if(FAILED(hr)) { goto end; }

	result = true;

	end:
	if(wfex) {
	CoTaskMemFree(wfex);
	wfex = nullptr;
	}
	if(!result) {
	close();
	}
	return result;
	}

	void close() {
	if(pAudioClient) {
	pAudioClient->Stop();
	}

	SafeRelease(pCaptureClient);
	SafeRelease(pAudioClient);
	SafeRelease(mmDevice);
	}

	IMMDevice* getMmDevice() const {
	return mmDevice;
	}

	using GetDataCallback = std::function<
	void(
	const char* data
	, int numFrames
	, int bytesPerFrame
	, bool isFloatSample
	)
	>;

	void getData(const GetDataCallback& callback) {
	HRESULT hr = S_OK;

	UINT packetLength = 0;
	hr = pCaptureClient->GetNextPacketSize(&packetLength);
	if(FAILED(hr)) { goto end; }

	while(packetLength != 0) {
	BYTE* pData = nullptr;
	UINT numFramesAvailable = 0;
	DWORD flags = 0;
	hr = pCaptureClient->GetBuffer(
	&pData
	, &numFramesAvailable
	, &flags
	, nullptr
	, nullptr
	);
	if(FAILED(hr)) { break; }

	if(flags & AUDCLNT_BUFFERFLAGS_SILENT) {
	pData = nullptr;
	}

	if(pData) {
	callback(
	reinterpret_cast<char*>(pData)
	, numFramesAvailable
	, (wfBitsPerSample / 8) * wfChannels
	, wfFloat
	);
	}
	hr = pCaptureClient->ReleaseBuffer(numFramesAvailable);
	if(FAILED(hr)) { break; }

	hr = pCaptureClient->GetNextPacketSize(&packetLength);
	if(FAILED(hr)) { break; }
	}

	end:
	;
	}

	static std::tuple<std::wstring, std::wstring> getDeviceInfo(
	IMMDevice* mmDevice
	) {
	std::wstring wsDeviceId;
	std::wstring wsDeviceName;

	HRESULT hr = S_OK;
	IPropertyStore* propertyStore = nullptr;

	WCHAR* deviceId = nullptr;
	hr = mmDevice->GetId(&deviceId);
	if(FAILED(hr)) goto end;

	hr = mmDevice->OpenPropertyStore(STGM_READ, &propertyStore);
	if(FAILED(hr)) goto end;

	PROPVARIANT deviceName;
	PropVariantInit(&deviceName);
	hr = propertyStore->GetValue(PKEY_Device_FriendlyName, &deviceName);
	if(FAILED(hr)) goto end;

	wsDeviceId = deviceId;
	wsDeviceName = deviceName.pwszVal;

	end:
	SafeRelease(propertyStore);
	if(deviceId) {
	CoTaskMemFree(deviceId);
	deviceId = nullptr;
	}

	return std::make_tuple(wsDeviceId, wsDeviceName);
	}

	using EnumAudioEndpointsCallback = std::function<
	void(int deviceIndex, IMMDevice* mmDevice)
	>;

	static HRESULT enumAudioEndpoints(
	const EnumAudioEndpointsCallback& callback
	) {
	const EDataFlow dataFlow = eRender;
	const ERole role = eConsole;
	const DWORD dwStateMask = DEVICE_STATE_ACTIVE;

	HRESULT hr = S_OK;

	IMMDeviceEnumerator* mmDeviceEnumerator = nullptr;
	IMMDeviceCollection* mmDeviceCollection = nullptr;

	hr = CoCreateInstance(
	__uuidof(MMDeviceEnumerator)
	, nullptr
	, CLSCTX_INPROC_SERVER
	, IID_PPV_ARGS(&mmDeviceEnumerator)
	);
	if(FAILED(hr)) goto end;

	hr = mmDeviceEnumerator->EnumAudioEndpoints(
	dataFlow
	, dwStateMask
	, &mmDeviceCollection
	);
	if(FAILED(hr)) goto end;

	{
	UINT nCount = 0;
	hr = mmDeviceCollection->GetCount(&nCount);
	if(FAILED(hr)) goto end;

	for(UINT iCount = 0; iCount < nCount; ++iCount) {
	IMMDevice* mmDevice = nullptr;
	hr = mmDeviceCollection->Item(iCount, &mmDevice);
	if(SUCCEEDED(hr)) {
	callback(static_cast<int>(iCount), mmDevice);
	}
	SafeRelease(mmDevice);
	}
	}

	end:
	SafeRelease(mmDeviceCollection);
	SafeRelease(mmDeviceEnumerator);
	return hr;
	}

	static HRESULT GetMmDevice(int deviceIndex, IMMDevice** ppMmDevice) {
	const EDataFlow dataFlow = eRender;
	const ERole role = eConsole;
	const DWORD dwStateMask = DEVICE_STATE_ACTIVE;

	HRESULT hr = S_OK;
	*ppMmDevice = nullptr;

	IMMDeviceEnumerator* mmDeviceEnumerator = nullptr;
	IMMDeviceCollection* mmDeviceCollection = nullptr;

	hr = CoCreateInstance(
	__uuidof(MMDeviceEnumerator)
	, nullptr
	, CLSCTX_INPROC_SERVER
	, IID_PPV_ARGS(&mmDeviceEnumerator)
	);
	if(FAILED(hr)) goto end;

	if(deviceIndex < 0) {
	hr = mmDeviceEnumerator->GetDefaultAudioEndpoint(
	dataFlow
	, role
	, ppMmDevice
	);
	if(FAILED(hr)) goto end;
	} else {
	hr = mmDeviceEnumerator->EnumAudioEndpoints(
	dataFlow
	, dwStateMask
	, &mmDeviceCollection
	);
	if(FAILED(hr)) goto end;

	hr = mmDeviceCollection->Item(
	static_cast<UINT>(deviceIndex)
	, ppMmDevice
	);
	if(FAILED(hr)) goto end;
	}

	end:
	SafeRelease(mmDeviceCollection);
	SafeRelease(mmDeviceEnumerator);
	return hr;
	}


	IMMDevice* mmDevice = nullptr;
	IAudioClient* pAudioClient = nullptr;
	IAudioCaptureClient* pCaptureClient = nullptr;

	int wfFormatTag = 0;
	int wfChannels = 0;
	int wfSamplesPerSec = 0;
	int wfBitsPerSample = 0;
	bool wfFloat = false;

	private:
	template<typename T>
	static void SafeRelease(T& p) {
	if(p) {
	p->Release();
	p = nullptr;
	}
	}
	};


	void test_EnumEndpoints() {
	WasapiCapturer::enumAudioEndpoints([&](
	int deviceIndex
	, IMMDevice* mmDevice
	) {
	std::wstring deviceId;
	std::wstring deviceName;

	std::tie(deviceId, deviceName)
	= WasapiCapturer::getDeviceInfo(mmDevice);

	wprintf(L"deviceIndex = %d\n", deviceIndex);
	wprintf(L" deviceId : <%s>\n", deviceId.c_str());
	wprintf(L" deviceName : <%s>\n", deviceName.c_str());
	});
	wprintf(L"\n");
	}


	void test_AudioCapture() {
	// You can set other device index value to deviceIndex to capture
	// specific device.
	// This device index value is same index value which you can retrieve
	// from first argument of WasapiCapturer::enumAudioEndpoints().
	int deviceIndex = WasapiCapturer::DefaultDeviceIndex;
	WasapiCapturer wasapi;
	wasapi.open(deviceIndex);

	{
	std::wstring deviceId;
	std::wstring deviceName;
	std::tie(deviceId, deviceName)
	= WasapiCapturer::getDeviceInfo(wasapi.getMmDevice());
	wprintf(L"Capturing(#%d, <%s>)\n", deviceIndex, deviceName.c_str());
	}

	wprintf(L"Press ESCAPE key to exit\n");

	bool bLoop = true;
	while(bLoop) {
	Sleep(10);
	if(GetAsyncKeyState(VK_ESCAPE) != 0) {
	bLoop = false;
	}

	const int maxBarLength = 32;
	int barLength = 0;

	wasapi.getData([&](
	const char* pData
	, int numFrames
	, int bytesPerFrame
	, bool isFloatSample
	) {
	float m = 0.0f;
	if(isFloatSample) {
	// Calculate RMS
	for(int iFrame = 0; iFrame < numFrames; ++iFrame) {
	const auto* frameData = reinterpret_cast<const char*>(
	pData + bytesPerFrame * iFrame
	);
	auto l = * reinterpret_cast<const float*>(frameData + 0);
	auto r = * reinterpret_cast<const float*>(frameData + 4);
	m += l*l;
	}
	m = sqrt(m / numFrames);
	}
	const auto bl = (maxBarLength * m);
	barLength = static_cast<int>(max(barLength, bl));
	});

	wprintf(L"\r");
	for(int i = 0; i < maxBarLength; ++i) {
	wprintf(L"%c", (i < barLength) ? '#' : ' ');
	}
	barLength = 0;
	}
	wasapi.close();
	}


	int main() {
	OleInitialize(nullptr);
	CoInitializeEx(nullptr, COINIT_MULTITHREADED);
	const auto prevConsoleMode = _setmode(_fileno(stdout), _O_U16TEXT);

	test_EnumEndpoints();
	test_AudioCapture();

	_setmode(_fileno(stdout), prevConsoleMode);
	CoUninitialize();
	}