目前正在做一个百度语音识别的小东西,不过有个问题正在困扰我。我是利用h5的audio录制一个blob上传到服务器然后再识别的,但是百度识别一直提示识别失败3301,我猜想这个可能和我的音频文件blob有关系。跪求解决方案!!
navigator.getUserMedia = (navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia); window.AudioContext = AudioContext || webkitAudioContext || mozAudioContext || msAudioContext; window.URL = URL || webkitURL || mozURL || msURL;
function $(sel){return document.getElementById(sel)}
var audioStream, leftChl=[], rightChl=[], recLength=0, btnStart=$(“start”), btnStop=$(“stop”);
window.onerror = Log;
function Log(msg) { console.log(msg); }
function openMic() { this.disabled = true; btnStop.removeAttribute(‘disabled’); Log(“start”); navigator.getUserMedia({audio:true}, function(stream) { audioStream = stream; Log(“proc stream”); try { var ac = new AudioContext, recorder = ac.createScriptProcessor(2048, 2, 2), microphone = ac.createMediaStreamSource(stream), filter = ac.createBiquadFilter(); // 缓冲的处理事件 recorder.onaudioprocess = function(e){ var ib = e.inputBuffer; leftChl.push(new Float32Array(ib.getChannelData(0))); rightChl.push(new Float32Array(ib.getChannelData(1))); recLength += 2048; }; microphone.connect(filter); filter.connect(recorder); // 音量节点链接到缓冲 recorder.connect(ac.destination); // 缓冲链接到扬声器 Log(“proc stream done.”); } catch(e) { Log(e.name + “:”+ (e.message||"") + " " + (e.stack||"") ) } }, function(e){Log(e.name + “:”+ (e.message||"") + " " + (e.stack||"") );}); }
/**
- 合并缓冲 */ function mergeBuffers(buf, len){ var ret= new Float32Array(len), offset = 0, l = buf.length, i=0,b; while(i<l){ b = buf[i++]; ret.set(b, offset); offset += b.length; } return ret; }
function interleave(left, right){ var l= left.length + right.length, ret = new Float32Array(l), idx = 0,i=0; while(i<l){ ret[i++] = left[idx]; ret[i++] = right[idx]; ++idx; } return ret; }
function writeUTFBytes(view, offset, string){ var l = string.length,i=0; for (; i < l; ++i){ view.setUint8(offset + i, string.charCodeAt(i)); } }
function getWAV(left, right, recLen) { Log(“proc left and right buffer.”); // 分别对左右缓冲扁平化处理, 然后混合在一起 var result = interleave( mergeBuffers(left, recLen), mergeBuffers(right, recLen)), rLen = result.length, // 为.wav文件创建个缓冲 buffer = new ArrayBuffer(44 + result.length * 2), view = new DataView(buffer); writeUTFBytes(view, 0, ‘RIFF’); view.setUint32(4, 44 + rLen * 2, true); writeUTFBytes(view, 8, ‘WAVE’); // FMT sub-chunk writeUTFBytes(view, 12, 'fmt '); view.setUint32(16, 16, true); view.setUint16(20, 1, true); // stereo (2 channels) view.setUint16(22, 2, true); view.setUint32(24, 44100, true); view.setUint32(28, 44100 * 4, true); view.setUint16(32, 4, true); view.setUint16(34, 16, true); // data sub-chunk writeUTFBytes(view, 36, ‘data’); view.setUint32(40, rLen * 2, true);
// write the PCM samples
var idx = 44,
vol = 1,i=0;
for (; i<rLen; ++i){
view.setInt16(idx, result[i] * (0x7FFF * vol), true);
idx += 2;
}
Log(view);
Log("build done.");
// 最终生成的Blob
return new Blob ( [ view ], { type : 'audio/wav' } );
} ( 网上一个大牛的blob生成,我就是利用这个识别的)
语音识别似乎是可以成功了,我将result的传递到了后台,不过情况很不理想,数据过于大而导致几乎不能成功,应该对result进行处理,将0值数据删除这样。
pcm不压缩处理的数据过于庞大,几乎不可识别,或许该寻找一些压缩方法