在一个循环中对音频块进行多线程处理(Python)

Question 1

我有一个大的音频文件，我想得到转录。为此，我选择了基于沉默的转换，即根据句子之间的沉默将音频文件分割成若干块。然而，即使是一个短的音频文件，这也需要比预期更长的时间。

from pydub import AudioSegment
from pydub.silence import split_on_silence
voice = AudioSegment.from_wav(path) #path to audio file
chunks = split_on_silence(voice, min_silence_len=500, silence_thresh=voice.dBFS-14, keep_silence=500,)
为了尝试更快地处理这些块，我尝试使用一个多线程的循环，如图所示
n_threads = len(chunks)
thread_list = []
for thr in range(n_threads):
    thread = Thread(target = threaded_process, args=(chunks[thr],))
    thread_list.append(thread)
    thread_list[thr].start()
for thread in thread_list:
    thread.join()
函数'threaded_process'应该执行语音到文本的转换。
def threaded_process(chunks): 
    fh = open("recognized.txt", "w+") 
    i = 0
    for chunk in chunks: 
        chunk_silent = AudioSegment.silent(duration = 10)  
        audio_chunk = chunk_silent + chunk + chunk_silent 
        print("saving chunk{0}.wav".format(i)) 
        audio_chunk.export("./chunk{0}.wav".format(i), bitrate ='192k', format ="wav") 
        file = 'chunk'+str(i)+'.wav'
        print("Processing chunk "+str(i)) 
        rec = audio_to_text(file) #Another function which actually does the Speech to text conversion(IBM Watson SpeechToText API)
        if rec == "Error5487":
            return "Error5487E"
        fh.write(rec+" ")
        os.remove(file)
        i += 1
    fh.close()
但转换是使用先前的方法完成的，没有使用多线程。
我还收到这样的消息--
[WinError 32] 进程无法访问该文件，因为它正被另一个进程使用。'chunk0.wav'
为什么会出现这种情况？

Question 2


          
           
            
             在这种情况下，多线程的速度更快，因为音频转录是在云端完成的。
            
            
             pydub (audio package)
            
            
             speech_recognition (google speech recognition API for audio to text)
            
            import concurrent.futures      # thread execution manager
import os
from time import time
import wget                    # save url data to file
from pydub import AudioSegment # process speech
from pydub.playback import play
from pydub.silence import split_on_silence
import speech_recognition as sr # speech recognizer
#########################################################
# Related to Data Acquisition
#########################################################
def get_sound_file(url):
    ' Gets data from a url and places into file '
    local_file = wget.download(url) 
    return local_file      # name of file data is placed into
def get_nonexistant_path(fname_path):
    Generates the next unused file name based upon the fname_path '
    Examples
    --------
    >>> get_nonexistant_path('/etc/issue')
    '/etc/issue-1'
    >>> get_nonexistant_path('whatever/1337bla.py')
    'whatever/1337bla.py'
    Source: https://stackoverflow.com/questions/17984809/how-do-i-create-a-incrementing-filename-in-python
    if not os.path.exists(fname_path):
        return fname_path
    filename, file_extension = os.path.splitext(fname_path)
    i = 1
    new_fname = "{}-{}{}".format(filename, i, file_extension)
    while os.path.exists(new_fname):
        i += 1
        new_fname = "{}-{}{}".format(filename, i, file_extension)
    return new_fname
def create_files(source_file):
    ' Splits data into multiple files based upon silence'
    sound = AudioSegment.from_wav(source_file)
    # Break into segments based upon silence
    segments = split_on_silence(sound, silence_thresh = sound.dBFS - 14)
    # Store as separate files
    #https://stackoverflow.com/questions/33747728/how-can-i-get-the-same-bitrate-of-input-and-output-file-in-pydub
    # https://wiki.audacityteam.org/wiki/WAV
    original_bitrate = str((sound.frame_rate * sound.frame_width * 8 * sound.channels) / 1000)
    file_list = []
    for audio_chunk in segments:
        # File whose enumeration number has not been used yet
        # i.e. file-1.wav, file-2.wav, ...
        file_list.append(get_nonexistant_path(source_file))                        # Add a file name
        audio_chunk.export(file_list[-1], format ="wav", bitrate=original_bitrate)# use name of last file added
    return file_list  # list of files created
#########################################################
# Speech to text
#########################################################
def audio_to_text(filename):
        Converts speech to text
        based upon blog: https://www.geeksforgeeks.org/audio-processing-using-pydub-and-google-speechrecognition-api/
    # Get recognizer
    r = sr.Recognizer() 
    with sr.AudioFile(filename) as source: 
        audio_listened = r.listen(source) 
        # Try to recognize the listened audio 
        # And catch expections. 
            return r.recognize_google(audio_listened) 
        # If google could not understand the audio 
        except sr.UnknownValueError: 
            print("Could not understand audio") 
            return None
        # If the results cannot be requested from Google. 
        # Probably an internet connection error. 
        except sr.RequestError as e: 
            print("Could not request results.") 
            return None
def process(file):
        Audio conversion of file to text file
    with open('result.txt', 'w') as fout:
        transcription = audio_to_text(file)
        if transcription:
            fout.write(transcription + '\n')
def process_single(files):
        Audio conversion multiple audio files into a text file
    with open('result-single.txt', 'w') as fout:
        for file in files:
            transcription = audio_to_text(file)
            if transcription:
                fout.write(transcription + '\n')
def process_threads(files):
        Audio conversion multiple audio files into a text file using multiple threads
    with open('result_thread.txt', 'w') as fout:
        # using max_workers = None means use default 
        # number threads which is 5*(number of cpu cores)
        with concurrent.futures.ThreadPoolExecutor(max_workers = None) as executor:
            for transcription in executor.map(audio_to_text, files):
                if transcription:
                    fout.write(transcription + '\n')
Test Code
if __name__ == "__main__":
    # url of data used for testing
    url = 'http://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav'
    # download data to local file
    data_file_name = get_sound_file(url)
    # place data into chunks based upon silence
    chunk_file_names = create_files(data_file_name)
    # Process single file without partitioning into chunks
    t0 = time()
    process(data_file_name)
    print(f'Running entire audio file elapsed time: {time() - t0:.4f}')
    # Single threaded version
    t0 = time()
    process_single(chunk_file_names)
    print(f'Running chunked audio files elapsed time: {time() - t0:.4f}')
    # Multiple threaded version
    t0 = time()
    process_threads(chunk_file_names)
    print(f'Running chunked audio files using multiple threads elapsed time: {time() - t0:.4f}')