不引用第三方库读取wav文件并逐帧估算基频
借助大模型实现:请用C语言实现,读取wav文件的数据,并估算每一帧的基频,输出每帧的基频值,最后求平均值,不要有省略的功能代码,不引用第三方专用库,不考虑运行效率,但是要求准确。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <math.h>
// WAV文件头结构
typedef struct {
char riff[4];
uint32_t overall_size;
char wave[4];
char fmt_chunk_marker[4];
uint32_t length_of_fmt;
uint16_t format_type;
uint16_t channels;
uint32_t sample_rate;
uint32_t byte_rate;
uint16_t block_align;
uint16_t bits_per_sample;
char data_chunk_header[4];
uint32_t data_size;
} WAV_HEADER;
// 读取WAV文件
int16_t* read_wav_file(const char* filename, uint32_t* num_samples, uint32_t* sample_rate) {
FILE* file = fopen(filename, "rb");
if (!file) {
perror("Error opening file");
exit(EXIT_FAILURE);
}
WAV_HEADER header;
fread(&header, sizeof(WAV_HEADER), 1, file);
if (header.format_type != 1) {
fprintf(stderr, "Unsupported format type: %u\n", header.format_type);
fclose(file);
exit(EXIT_FAILURE);
}
*sample_rate = header.sample_rate;
*num_samples = header.data_size / (header.bits_per_sample / 8);
int16_t* data = (int16_t*)malloc(*num_samples * sizeof(int16_t));
fread(data, sizeof(int16_t), *num_samples, file);
fclose(file);
return data;
}
// 计算自相关函数
double autocorrelation(int16_t* signal, int length, int lag) {
double sum = 0.0;
for (int i = 0; i < length - lag; i++) {
sum += signal[i] * signal[i + lag];
}
return sum;
}
// 估算基频
double estimate_fundamental_frequency(int16_t* frame, int frame_size, uint32_t sample_rate) {
double max_autocorr = -1.0;
int best_lag = 0;
int min_lag = sample_rate / 500; // 假设最低基频为500Hz
int max_lag = sample_rate / 50; // 假设最高基频为50Hz(根据奈奎斯特定理,可以更高)
for (int lag = min_lag; lag <= max_lag; lag++) {
double autocorr = autocorrelation(frame, frame_size, lag);
if (autocorr > max_autocorr) {
max_autocorr = autocorr;
best_lag = lag;
}
}
return (double)sample_rate / best_lag;
}
int main(int argc, char* argv[]) {
//if (argc != 2) {
// fprintf(stderr, "Usage: %s <wav_file>\n", argv[0]);
// return EXIT_FAILURE;
// }
uint32_t num_samples, sample_rate;
int16_t* data = read_wav_file("wdsyy1.wav", &num_samples, &sample_rate);
int frame_size = 1024; // 帧大小
int hop_size = 512; // 帧移
int num_frames = (num_samples - frame_size) / hop_size + 1;
double *fundamental_frequencies = new double[num_frames];
double sum_fundamentals = 0.0;
for (int i = 0; i < num_frames; i++) {
double fundamental_frequency = estimate_fundamental_frequency(data + i * hop_size, frame_size, sample_rate);
fundamental_frequencies[i] = fundamental_frequency;
sum_fundamentals += fundamental_frequency;
printf("Frame %d: Fundamental Frequency = %.2f Hz\n", i, fundamental_frequency);
}
double average_fundamental = sum_fundamentals / num_frames;
printf("Average Fundamental Frequency = %.2f Hz\n", average_fundamental);
free(data);
return EXIT_SUCCESS;
}本代码支行结果:

matlab运行代码及结果
wlen=1024;inc=512;
%% 读取音频文件
[x, fs] = audioread('wdsyy1.wav');
%% 将音频信号转换为单声道
if size(x, 2) == 2
x = mean(x, 2);
end
%% 提取基音频率
[f0,idx] = pitch(x,fs, 'WindowLength',wlen,'OverlapLength',inc);%求取语音的基音频率
fn1=size(f0,1);
result_f0=f0(find(f0<=400));%筛选符合条件的基音频率
figure()
plot(result_f0);%绘制基音频率
title('基音频率f0')
xlabel('帧')
ylabel('频率/Hz')
axis([0 fn1 0 400])%设置坐标轴范围
凯特网版权声明:以上内容允许转载,但请注明出处,谢谢!
