Commit 12721e92 authored by Thomas Biberger's avatar Thomas Biberger
Browse files

update combine_binQ_OPM.m,Example_combAudioQual.m, and add .gitignore

parent 425dfa65
*.asv
*.m~
*.DS_Store
function binQ = BAMQ(mRef, mTest, fs)
%BAMQ calculate perceptual binaural difference between mRef and mTest
% -------------------------------------------------------------------------
%
% Usage: binQ = BAMQ(mRef, mTest, fs)
%
% Input: mRef ... reference signal matrix (Nx2)
% mTest ... aligned test signal matrix (Nx2)
% fs ... sampling frequency (files are resampled to 44.1 kHz)
%
% 0 dB FS should correspond to 115 dB SPL
%
% Output: binQ ... binaural perceptual quality value
%
% Values: 100 ... no difference
% 0 ... obvious difference
% -X ... even bigger difference
%
%
% Copyright (C) University of Oldenburg and HrTech 2012-2015
% Author : flja
% Date : 12-02-2019
funcFile = mfilename('fullpath');
pathstr = fileparts(funcFile);
addpath([pathstr filesep 'functions'])
if nargin < 3,
error('Not enough input arguments.');
end
if fs ~= 44100
mRef = resample(mRef,44100,fs);
mTest = resample(mTest,44100,fs);
fs = 44100;
end
if size(mRef,1) < fs*0.4,
error('signals need to be at least 0.4 s long')
end
% avoid NaNs in IVS calculation:
mRef = [0.00001,0.00001;mRef(1:end-1,:)];
mTest = [0.00001,0.00001;mTest(1:end-1,:)];
%% FRONT END: modified Dietz model
[mITDref, mIVSref, mILDref, mPowRef1, mPowRef2] = BAMQFrontEnd(mRef,fs);
[mITDtest, mIVStest, mILDtest, mPowTest1, mPowTest2, fc, fsNew] = BAMQFrontEnd(mTest,fs);
%% BACK END
binQ = BAMQBackEnd(mITDref, mIVSref, mILDref, mPowRef1, mPowRef2, ...
mITDtest, mIVStest, mILDtest, mPowTest1, mPowTest2, fc, fsNew);
binQ = ceil(binQ);
function [binQ, ILDdiff, ITDdiff, IVSdiff] = BAMQ4Public_restruct(mRef, mTest, fs)
%BAMQ calculate perceptual binaural difference between mRef and mTest
% -------------------------------------------------------------------------
%
% Usage: [binQ, binQild, binQitd, binQivs, binQitdivs, ILDdiff, ITDdiff,
% IVSdiff, cRefOutputs] = BAMQ4Public(mRef, mTest, fs, cRefOutputs)
%
% Input: mRef ... reference signal matrix (Nx2)
% mTest ... aligned test signal matrix (Nx2)
% fs ... sampling frequency (files are resampled to 44.1 kHz)
%
% 0 dB FS should correspond to 115 dB SPL
%
% Output: binQ ... binaural perceptual quality value
% ILDdiff ... intermediate ILD measure
% ITDdiff ... intermediate ITD measure (can be 0 if ITDs are not evaluable)
% IVSdiff ... intermediate IVS measure
%
% Values: 100 ... no difference
% 0 ... obvious difference
% -X ... even bigger difference
%
%
% Copyright (C) University of Oldenburg and HrTech 2012-2015
% Author : jan-hendrik.flessner@jade-hs.de
% Date : 12-02-2019
if nargin < 3,
error('Not enough input arguments.');
end
if size(mRef,2) ~= 2 || size(mTest,2) ~= 2
error('input signals need to be Nx2')
end
if size(mRef,1) ~= size(mTest,1)
error('input signals need to be same length (and time-aligned)')
end
if fs ~= 44100
mRef = resample(mRef,44100,fs);
mTest = resample(mTest,44100,fs);
fs = 44100;
end
if size(mRef,1) < fs*0.4,
error('signals need to be at least 0.4 s long')
end
% avoid NaNs in IVS calculation:
mRef = [0.00001,0.00001;mRef(1:end-1,:)];
mTest = [0.00001,0.00001;mTest(1:end-1,:)];
%% FRONT END: modified Dietz model
[mITDref, mIVSref, mILDref, mPowRef1, mPowRef2] = BAMQFrontEnd(mRef,fs);
[mITDtest, mIVStest, mILDtest, mPowTest1, mPowTest2, fc, fsNew] = BAMQFrontEnd(mTest,fs);
%% BACK END
[binQ, ILDdiff, ITDdiff, IVSdiff] = BAMQBackEnd(mITDref, mIVSref, mILDref, mPowRef1, mPowRef2, ...
mITDtest, mIVStest, mILDtest, mPowTest1, mPowTest2, fc, fsNew);
binQ = ceil(binQ);
end
function [mITD, mIVS, mILD, mPow1, mPow2, fc, fsNew] = BAMQFrontEnd(insig,fs)
%% Initialization
fsNew = 6000;
% Preprocessing parameters
vMiddleEarThr = [500 2000]; % Bandpass freqencies for middle ear transfer
middleEarOrder = 1;
compressionPower = 0.4;
haircellLowOrder = 5;
haircellLowCutoffHz = 770;
% Gammatone filterbank parameters
fLow = 50;
fHigh = 14000;
fBase = 700;
filtersPerERB = 1;
gammaOrder = 4;
bandwidthFactor = 1;
% modulation filter parameters
fModLow1 = 500;
fModLow2 = 600;
orderModLow1 = 2;
orderModLow2 = 10;
modGammaCenterFreqHz = 300;
modGammaBandwidthHz = 300;
modGammaAttenuationDB = 3;
modGammaOrder = 1;
% temporal resolution
tauCycles = 5; % in cycles
%% middle ear band pass filtering
[z,p,k] = butter(middleEarOrder,vMiddleEarThr/(fs/2));
[sos,g] = zp2sos(z,p,k);
h=dfilt.df2sos(sos,g);
mSignalME = filter(h,insig);
%% Filterbank analysis
% create filterbank
analyzer = gfb_analyzer_new(fs, fLow, fBase, fHigh,...
filtersPerERB,gammaOrder,bandwidthFactor);
fc = analyzer.center_frequencies_hz;
analyzer2 = analyzer;
idxHighestFine = find(fc < 1400,1,'last');
% apply filterbank
[mSignalIE, analyzer] = gfb_analyzer_process(analyzer, mSignalME(:,1));
[mSignalIE2, analyzer2] = gfb_analyzer_process(analyzer2, mSignalME(:,2));
mSignalIE = mSignalIE.';
mSignalIE2 = mSignalIE2.';
clear vSignalME
%% haircell processing
% half-wave rectification
mSignalRect = max(real(mSignalIE),0);
mSignalRect2 = max(real(mSignalIE2),0);
clear mSignalIE mSignalIE2
% compression
mSignalHC = mSignalRect.^compressionPower;
mSignalHC2 = mSignalRect2.^compressionPower;
clear mSignalRect mSignalRect2
% audio signal power calculation
mSignalHC1re = resample(mSignalHC,fsNew,fs);
mSignalHC2re = resample(mSignalHC2,fsNew,fs);
mPow1 = abs(mSignalHC1re.^(0.4^-1)).^2;
mPow2 = abs(mSignalHC2re.^(0.4^-1)).^2;
% lowpass
[z,p,k] = butter(haircellLowOrder,haircellLowCutoffHz/(fs/2));
[sos,g] = zp2sos(z,p,k);
h=dfilt.df2sos(sos,g);
mSignalHC = filter(h,mSignalHC);
mSignalHC2 = filter(h,mSignalHC2);
%% finestructure filter
% processing the haircell output with a fine structure filter
[mSignalFine, mSignalFine2] =...
gfb_envelope_filter(mSignalHC(:,1:idxHighestFine), mSignalHC2(:,1:idxHighestFine), fs, fc(1:idxHighestFine),...
bandwidthFactor,0,gammaOrder);
% modulation filter part 1, derivation to remove DC
mSignalModNoDC = filter([1 -1],1,mSignalHC(:,idxHighestFine+1:end));
mSignalModNoDC2 = filter([1 -1],1,mSignalHC2(:,idxHighestFine+1:end));
clear mSignalHC mSignalHC2
% calculation of interaural functions from haircell fine structure
stInterauralParamFine = dietzInterauralFunctions4binQ(...
mSignalFine, mSignalFine2, fc(1:idxHighestFine), tauCycles, fs, fsNew);
%% modulation filter part2
% mod filter lowpass 1 and 2
[z,p,k] = butter(orderModLow1,fModLow1/(fs/2));
[sos,g] = zp2sos(z,p,k);
h=dfilt.df2sos(sos,g);
mSignalModLow1 = filter(h,mSignalModNoDC);
mSignalModLow12 = filter(h,mSignalModNoDC2);
clear mSignalModNoDC mSignalModNoDC2
[z,p,k] = butter(orderModLow2,fModLow2/(fs/2));
[sos,g] = zp2sos(z,p,k);
h=dfilt.df2sos(sos,g);
mSignalModLow2 = filter(h,mSignalModLow1);
mSignalModLow22 = filter(h,mSignalModLow12);
clear mSignalModLow1 mSignalModLow12
[mSignalMod, mSignalMod2] =...
gfb_envelope_filter(mSignalModLow2, mSignalModLow22, fs,...
modGammaCenterFreqHz, modGammaBandwidthHz, ...
modGammaAttenuationDB, modGammaOrder);
clear mSignalModLow2 mSignalModLow22
% calculation of interaural functions from haircell modulation
stInterauralParamMod = dietzInterauralFunctions4binQ(...
mSignalMod, mSignalMod2, modGammaCenterFreqHz + zeros(1,length(mSignalMod(1,:))),...
tauCycles, fs, fsNew);
clear mSignalMod mSignalMod2
%% combine fine and mod
mIPD = [stInterauralParamFine.ipd_lp stInterauralParamMod.ipd_lp];
mIVS = [stInterauralParamFine.ic stInterauralParamMod.ic];
mFinst = [stInterauralParamFine.f_inst stInterauralParamMod.f_inst];
%% IPD2ITD
mITD = 1/(2*pi)*mIPD./mFinst;
%% ILD
[z,p,k] = butter(2,30/(fsNew/2));
[sos,g] = zp2sos(z,p,k);
h=dfilt.df2sos(sos,g);
mSignalLP = filter(h,mSignalHC1re);
mSignalLP2 = filter(h,mSignalHC2re);
mILD = 20*log10(max(mSignalLP2,1e-4)./max(mSignalLP,1e-4))/0.4;
end
function analyzer = gfb_analyzer_new(fs,flow,basef,fhigh,filters_per_ERBaud,gamma_order,bandwidth_factor)
if (nargin < 6)
% The order of the gammatone filter is derived from the global constant
% GFB_PREFERED_GAMMA_ORDER defined in "gfb_set_constants.m". Usually,
% this is equal to 4.
global GFB_PREFERED_GAMMA_ORDER;
gfb_set_constants;
gamma_order = GFB_PREFERED_GAMMA_ORDER;
end
if (nargin < 7)
bandwidth_factor = 1.0;
end
% To avoid storing information in global variables, we use Matlab
% structures:
analyzer.type = 'gfb_analyzer';
analyzer.fs = fs;
analyzer.flow = flow;
analyzer.basef = basef;
analyzer.fhigh = fhigh;
analyzer.filters_per_ERBaud = filters_per_ERBaud;
analyzer.bandwidth_factor = bandwidth_factor;
analyzer.fast = 0;
%
% analyzer.center_frequencies_hz = ...
% erbspacebw(flow,fhigh,1/filters_per_ERBaud,basef);
analyzer.center_frequencies_hz = ...
audspacebw(flow,fhigh,1/filters_per_ERBaud,basef);
% This loop actually creates the gammatone filters:
for band = [1:length(analyzer.center_frequencies_hz)]
center_frequency_hz = analyzer.center_frequencies_hz(band);
% Construct gammatone filter with one ERBaud bandwidth:
analyzer.filters(1,band) = ...
gfb_filter_new(fs, center_frequency_hz, ...
gamma_order, bandwidth_factor);
end
end
function [output, analyzer] = gfb_analyzer_process(analyzer, input)
if (analyzer.fast)
% use matlab extension for fast computation.
[output, analyzer] = gfb_analyzer_fprocess(analyzer, input);
else
number_of_bands = length(analyzer.center_frequencies_hz);
output = zeros(number_of_bands, length(input));
for band = [1:number_of_bands]
[output(band,:), analyzer.filters(band)] = ...
gfb_filter_process(analyzer.filters(band), ...
input );
end
end
end
function [envelopes_filtered, envelopes_sh_filtered] = gfb_envelope_filter(s1, s2, sampling_rate_hz, center_frequency_hz,...
bandwidth_hz, attenuation_db, gamma_filter_order)
flag = 0;
s1 = s1.';
s2 = s2.';
M = size(s1,1);
if length(center_frequency_hz) == 1
center_frequency_hz = center_frequency_hz * ones(1,M);
end
if isempty(bandwidth_hz) % default: width = 1 ERB
% recip_width1erb = diff(gfb_hz2erbscale(1:N/2));
% bandwidth_hz = round(1./recip_width1erb(round(center_frequency_hz)));
flag = 1;
elseif length(bandwidth_hz) == 1
bandwidth_hz = bandwidth_hz * ones(1,M);
end
for i = 1:M
if flag,
filter = gfb_filter_new(sampling_rate_hz, center_frequency_hz(i),...
gamma_filter_order);
else
if attenuation_db == 0,
filter = gfb_filter_new(sampling_rate_hz, center_frequency_hz(i),...
gamma_filter_order, bandwidth_hz(i));
else
filter = gfb_filter_new(sampling_rate_hz, center_frequency_hz(i),...
bandwidth_hz(i), attenuation_db, gamma_filter_order);
end
end
[envelopes_filtered(:,i), filter_obj] = ...
gfb_filter_process(filter, s1(i,:));
[envelopes_sh_filtered(:,i), filter_obj] = ...
gfb_filter_process(filter, s2(i,:));
end
end
function filter = gfb_filter_new(arg1,arg2,arg3,arg4,arg5)
global GFB_L;
global GFB_Q;
filter.type = 'gfb_Filter';
if (nargin == 2)
filter.coefficient = arg1;
filter.gamma_order = arg2;
elseif (nargin == 3) || (nargin == 4)
sampling_rate_hz = arg1;
center_frequency_hz = arg2;
filter.gamma_order = arg3;
bandwidth_factor = 1.0;
if (nargin == 4)
bandwidth_factor = arg4;
end
gfb_set_constants;
% equation (13) [Hohmann 2002]:
audiological_erb = (GFB_L + center_frequency_hz / GFB_Q) * bandwidth_factor;
% equation (14), line 3 [Hohmann 2002]:
a_gamma = (pi * factorial(2*filter.gamma_order - 2) * ...
2 ^ -(2*filter.gamma_order - 2) / ...
factorial(filter.gamma_order - 1) ^ 2);
% equation (14), line 2 [Hohmann 2002]:
b = audiological_erb / a_gamma;
% equation (14), line 1 [Hohmann 2002]:
lambda = exp(-2 * pi * b / sampling_rate_hz);
% equation (10) [Hohmann 2002]:
beta = 2 * pi * center_frequency_hz / sampling_rate_hz;
% equation (1), line 2 [Hohmann 2002]:
filter.coefficient = lambda * exp(1i * beta);
elseif (nargin == 5)
sampling_rate_hz = arg1;
center_frequency_hz = arg2;
bandwidth_hz = arg3;
attenuation_db = arg4;
filter.gamma_order = arg5;
% equation (12), line 4 [Hohmann 2002]:
phi = pi * bandwidth_hz / sampling_rate_hz;
% equation (12), line 3 [Hohmann 2002]:
u = -attenuation_db/filter.gamma_order;
% equation (12), line 2 [Hohmann 2002]:
p = (-2 + 2 * 10^(u/10) * cos(phi)) / (1 - 10^(u/10));
% equation (12), line 1 [Hohmann 2002]:
lambda = -p/2 - sqrt(p*p/4 - 1);
% equation (10) [Hohmann 2002]:
beta = 2 * pi * center_frequency_hz / sampling_rate_hz;
% equation (1), line 2 [Hohmann 2002]:
filter.coefficient = lambda * exp(1i*beta);
else
error ('gfb_filter_new needs either 2, 3, 4 or 5 arguments');
end
% normalization factor from section 2.2 (text) [Hohmann 2002]:
filter.normalization_factor = ...
2 * (1 - abs(filter.coefficient)) ^ filter.gamma_order;
filter.state = zeros(1, filter.gamma_order);
end
function [output, filter_obj] = gfb_filter_process(filter_obj, input)
factor = filter_obj.normalization_factor;
% for compatibility of the filter state with the MEX extension, we
% have to multiply the filter state with the filter coefficient before the
% call to filter:
filter_state = filter_obj.state * filter_obj.coefficient;
for i = [1:filter_obj.gamma_order]
[input, filter_state(i)] = ...
filter(factor, [1, -filter_obj.coefficient], ...
input, filter_state(i));
factor = 1;
end
output = input;
% for compatibility of the filter state with the MEX extension, we
% have to divide the filter state by the filter coefficient after the
% call to filter:
filter_obj.state = filter_state / filter_obj.coefficient;
end
function gfb_set_constants
global GFB_L GFB_Q GFB_PREFERED_GAMMA_ORDER GFB_GAINCALC_ITERATIONS;
GFB_L = 24.7; % see equation (17) in [Hohmann 2002]
GFB_Q = 9.265; % see equation (17) in [Hohmann 2002]
% We will use 4th order gammatone filters:
GFB_PREFERED_GAMMA_ORDER = 4;
% The gain factors are approximated in iterations. This is the default
% number of iterations:
GFB_GAINCALC_ITERATIONS = 100;
end
function [y,n] = audspacebw(flow,fhigh,varargin)
% ------ Checking of input parameters ---------
if nargin<2
error('%s: Too few input parameters.',upper(mfilename));
end;
if ~isnumeric(flow) || ~isscalar(flow) || flow<0
error('%s: flow must be a non-negative scalar.',upper(mfilename));
end;
if ~isnumeric(fhigh) || ~isscalar(fhigh) || fhigh<0
error('%s: fhigh must be a non-negative scalar.',upper(mfilename));
end;
if flow>fhigh
error('%s: flow must be less than or equal to fhigh.',upper(mfilename));
end;
% import = {'freqtoaud'};
hitme = varargin{2};
bw = varargin{1};
if ~isnumeric(bw) || ~isscalar(bw) || bw<=0
error('%s: bw must be a positive scalar.',upper(mfilename));
end;
%% ------ Computation --------------------------
if isempty(hitme)
% Convert the frequency limits to auds.
% audlimits = freqtoaud([flow,fhigh],flags.audscale);
audlimits = 9.2645*sign([flow,fhigh]).*log(1+abs([flow,fhigh])*0.00437);
audrange = audlimits(2)-audlimits(1);
% Calculate number of points, excluding final point
n = floor(audrange/bw);
% The remainder is calculated in order to center the points
% correctly between flow and fhigh.
remainder = audrange-n*bw;
audpoints = audlimits(1)+(0:n)*bw+remainder/2;
% Add the final point
n=n+1;
else
% Convert the frequency limits to auds.
% audlimits = freqtoaud([flow,fhigh,hitme],flags.audscale);
audlimits = 9.2645*sign([flow,fhigh,hitme]).*log(1+abs([flow,fhigh,hitme])*0.00437);
audrangelow = audlimits(3)-audlimits(1);
audrangehigh = audlimits(2)-audlimits(3);
% Calculate number of points, exluding final point.
nlow = floor(audrangelow/bw);
nhigh = floor(audrangehigh/bw);
audpoints=(-nlow:nhigh)*bw+audlimits(3);
n=nlow+nhigh+1;
end;
% y = audtofreq(audpoints,flags.audscale);
y = (1/0.00437)*sign(audpoints).*(exp(abs(audpoints)/9.2645)-1);
end
function aud = freqtoaud(freq,varargin)
%% ------ Checking of input parameters ---------
if nargin<1
error('%s: Too few input parameters.',upper(mfilename));
end;
if ~isnumeric(freq)
error('%s: freq must be number.',upper(mfilename));
end;
% definput.import={'freqtoaud'};
% [flags,kv]=ltfatarghelper({},definput,varargin);
%% ------ Computation --------------------------
if flags.do_mel
aud = 1000/log(17/7)*sign(freq).*log(1+abs(freq)/700);
end;
if flags.do_mel1000
aud = 1000/log(2)*sign(freq).*log(1+abs(freq)/1000);
end;
if flags.do_erb
% There is a round-off error in the Glasberg & Moore paper, as
% 1000/(24.7*4.37)*log(10) = 21.332 and not 21.4 as they state.
% The error is tiny, but may be confusing.
% On page 37 of the paper, there is Fortran code with yet another set
% of constants:
% 2302.6/(24.673*4.368)*log10(1+freq*0.004368);
aud = 9.2645*sign(freq).*log(1+abs(freq)*0.00437);
end;
if flags.do_bark
% The bark scale seems to have several different approximations available.
% This one was found through http://www.ling.su.se/STAFF/hartmut/bark.htm
aud = sign(freq).*((26.81./(1+1960./abs(freq)))-0.53);
% The one below was found on Wikipedia.
%aud = 13*atan(0.00076*freq)+3.5*atan((freq/7500).^2);
end;
if flags.do_erb83
aud = sign(freq).*(11.17*log((abs(freq)+312)./(abs(freq)+14675))+43.0);
end;
if flags.do_freq
aud = freq;
end;
end
function outp = dietzInterauralFunctions4binQ(s1, s2, fc, coh_cycles, fs, fsNew)
s1 = resample(s1,fsNew,fs);
s2 = resample(s2,fsNew,fs);