I am getting multiple threads of the same process in CLOSE_WAIT because of which i am getting ‘too many files open’ error.
OSError: [Errno 24] Too many open files:
This is happening when multiple calls to google cloud speech api is made.
Have gone through various answers on stackoverflow, but i am unable to figure out the solution.
sudo lsof | grep -i close | wc -l 15180
The code I have shared is a trimmed version of the actual code. I am able to reproduce the error using the code below.
import os import tornado.httpserver, tornado.ioloop, tornado.options, tornado.web, tornado.escape import os.path import string import json from google.cloud import speech from google.cloud.speech import types, enums tornado.options.parse_command_line() tornado.options.define("port", default=8888, help="run on the given port", type=int) SPEECH_TO_TEXT_CREDENTIALS = 'my_json_file.json' UPLOAD_FOLDER = '/home/ubuntu/uploads' class Application(tornado.web.Application): def __init__(self): os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = SPEECH_TO_TEXT_CREDENTIALS self.speech_client = speech.SpeechClient() handlers = [ (r"/test_bug/client/googlestt2", GoogleSTTHandler) ] tornado.web.Application.__init__(self, handlers) class GoogleSTTHandler(tornado.web.RequestHandler): def post(self): if 'audio' not in self.request.files: self.finish({'Error': "No audio provided"}) audio_filename = 'test.wav' audio = self.request.files['audio'][0] with open(os.path.join(UPLOAD_FOLDER, audio_filename), 'wb') as f: f.write(audio['body']) with open(os.path.join(UPLOAD_FOLDER, audio_filename), 'rb') as audio_file: content = audio_file.read() audio = types.RecognitionAudio(content=content) config = types.RecognitionConfig(encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, language_code='en-IN') response = self.application.speech_client.recognize(config, audio) if not response.results: Transcript_Upload = "Empty Audio" else: for result in response.results: Transcript_Upload = 'Transcript: {}'.format(result.alternatives[0].transcript) self.finish(Transcript_Upload) def main(): http_server = tornado.httpserver.HTTPServer(Application()) http_server.listen(tornado.options.options.port) tornado.ioloop.IOLoop.instance().start() if __name__ == "__main__": main()
Please suggest if I am doing something wrong and how to fix this.
Advertisement
Answer
This known issue in the google-cloud-python
as well as gcloud-python
– https://github.com/googleapis/google-cloud-python/issues/5570.
I dropped it and since then I’ve been using google API directly.
As side note, you are using synchronous API, but to leverage Tornado (actually any asynchronous framework) you should use async libs/calls etc like google-cloud-python’s Asynchronous Recognition