SyntaxHighlighter

2013年3月11日月曜日

[Python]tsファイルのエンコードスクリプト

溜まったTSをいちいちエンコードするのも面倒なので、pythonの練習も兼ねてスクリプトを作成してみました。
例外処理とか色々荒削りな部分もありますが、とりあえず動けばいいやレベルで。

以下、スクリプト全体。





# -*- coding: utf-8 -*-
'''
Created on 2013/03/11

2013/03/11
  - File was created.

@author: Juntaro Minezaki
'''

import datetime
import json
import os
import platform
import shutil
import subprocess
import sys
import threading
import time
import unicodedata

if (platform.system() == 'Linux'):
    print('Linux OS. Using utf-8.')
    import pexpect
    SYSTEM = 'Linux'
    OS_ENCODING = 'utf-8'
    ENCODER_PATH = u'mencoder'
    TASKLIST_PATH = u'~/tasklist.json'
    WAIT_FOLDER = u'~/Wait'
    PROCESS_FOLDER = u'~/Wait'
    TV_FOLDER = u'~/tv'
    MAX_THREAD = 4
elif (platform.system() == 'Windows'):
    print('Windows OS. Using cp932')
    SYSTEM = 'Windows'
    OS_ENCODING = 'cp932'
    ENCODER_PATH = u'  '   # mencoder.exeのフルパス
    TASKLIST_PATH = u'  '  # 進捗を保存用ファイルのパス
    WAIT_FOLDER = u'  '    # エンコード対象を格納するフォルダ 
    PROCESS_FOLDER = u'  ' # 現状未使用
    TV_FOLDER = u'  '      # エンコード後のファイルを格納するフォルダ
    MAX_THREAD = 3         # 同時進行するエンコード数
else:
    print('Unsupported OS. Exit.')
    sys.exit()

MAX_COMPLETED = 100

FIRST_PASS_OPTION = u'-nosound -ovc xvid -xvidencopts pass=1:bitrate=2000:threads=1:me_quality=1: -vf yadif=3,pp=l5,framestep=2,scale=1280:720 -ofps 30000/1001'
SECOND_PASS_OPTION = u'-oac mp3lame -lameopts abr:br=192 -ovc xvid -xvidencopts pass=2:bitrate=2000:threads=1 -vf yadif=3,pp=l5,framestep=2,scale=1280:720 -ofps 30000/1001'

class TaskList():
    def __init__(self):
            self.tasklist = {'wait':[], 'process':[], 'completed':[]}
    
    def __add_file_to_wait(self, path):
        elemstat = os.stat(path)
        moddate = datetime.datetime.fromtimestamp(elemstat.st_mtime)
        datestr = moddate.strftime('%Y-%m-%d %H:%M:%S')
        for element in self.tasklist['wait']:
            if element.get('path', u'') == path:
                return
        for element in self.tasklist['process']:
            if element.get('path', u'') == path:
                return
        for element in self.tasklist['completed']:
            if element.get('path', u'') == path:
                return
        self.tasklist['wait'].append({
                                'date': datestr,
                                'path': path
                                })
        self.tasklist['wait'].sort (key=lambda x:x['date'])
    
    def __delete_wait(self, path):
        for element in self.tasklist['wait']:
            if element['path'] == path:
                self.tasklist['wait'].remove(element)
                return True
        return False
    
    def __delete_old_completed(self):
        while len(self.tasklist['completed']) > MAX_COMPLETED:
            element = self.tasklist['completed'][0]
            self.tasklist['completed'].remove(element)
        return
    
    def __add_process(self, path):
        current = datetime.datetime.now()
        datestr = current.strftime('%Y-%m-%d %H:%M:%S')
        self.tasklist['process'].append({
            'path': path,
            'date': datestr,
            'pass': 0,
            'progress': 0
        })
        return True
    
    def __delete_process(self, path):
        for element in self.tasklist['process']:
            if element['path'] == path:
                self.tasklist['process'].remove(element)
                return True
        return False
    
    def __add_completed(self, path):
        current = datetime.datetime.now()
        datestr = current.strftime('%Y-%m-%d %H:%M:%S')
        self.tasklist['completed'].append({
            'path': path,
            'date': datestr
            })
        self.__delete_old_completed()
        return True
    
    def load(self, src):
        try:
            f = open(src, 'r')
            importtask = json.loads(f.read())
            f.close
        except:
            importtask = {'wait':[], 'process':[], 'completed':[]}
        for process in importtask['process']:
            if (os.path.exists(process.get('path', u'')) == False):
                continue
            for element in self.tasklist['process']:
                if element.get('path', u'') == process.get('path', u''):
                    break
            else:
                #暫定
                self.__add_file_to_wait(process['path'])
        for wait in importtask['wait']:
            if (os.path.exists(wait.get('path', u'')) == False):
                continue
            for element in self.tasklist['process']:
                if element.get('path', u'') == wait.get('path', u''):
                    break
            else:
                self.__add_file_to_wait(wait['path'])
        for completed in importtask['completed']:
            for element in self.tasklist['completed']:
                if element.get('path', u'') == completed.get('path', u''):
                    break
            else:
                self.tasklist['completed'].append(completed)
        return
    
    def save(self, dest):
        try:
            f = open(dest, 'w')
            f.write(json.dumps(self.tasklist))
            f.close
        except IOError:
            print ('Failed to open Task List File.')
        return
    
    def add_folder_to_wait(self, folder):
        for element in os.listdir(folder):
            elempath = os.path.join(WAIT_FOLDER, element)
            (root, ext) = os.path.splitext(element)
            if ext == '.ts':
                self.__add_file_to_wait(elempath)
        for element in self.tasklist['wait']:
            try:
                if (os.path.exists(element.path) == False):
                    self.__delete_wait(element.path)
            except:
                pass
        return True
    
    def move_wait_to_process(self, path):
        if os.path.exists(path) == False:
            self.__delete_wait(path)
            return False
        if self.__delete_wait(path) == False:
            return False
        return self.__add_process(path)
    
    def move_process_to_completed(self, srcpath, destpath):
        if (self.__delete_process(srcpath) == False):
            return False
        return self.__add_completed(destpath)
    
    def get_highest_in_wait(self):
        if self.tasklist['wait'] == []:
            return u''
        return self.tasklist['wait'][0].get('path', u'')
    
    def update_process(self, process):
        path = process.get('path', u'')
        for element in self.tasklist['process']:
            if element['path'] == path:
                element['pass'] = process.get('pass', 0)
                element['progress'] = process.get('progress', 0)
                break

class EncodeThread(threading.Thread):
    def __init__(self, index):
        threading.Thread.__init__(self)
        self.index = index  #Thread number
        self.free = True    #
        self.wait = False   #Waiting for execute encoding
        self.path = u''     #File which will be encoded
        self.encodepass = 0
        self.progress = 0
            
    def setpath(self, path):
        self.path = path
        self.wait = True
    
    def getpath(self):
        return self.path
    
    def isfree(self):
        return self.free
    
    def iswait(self):
        return self.wait
    
    def iscompleted(self):
        if (self.isfree () == True):
            return False
        if (self.encodepass > 2):
            return True
        return False
    
    def isnewtask(self):
        if (self.isfree() == True):
            if (self.iswait() == True):
                return True
        return False
    
    def getindex(self):
        return self.index
    
    def getprogress(self):
        return self.progress
    
    def getencodepass(self):
        return self.encodepass
    
    def getprocess(self):
        return {'path': self.path,
                'pass': self.encodepass,
                'progress': self.progress}
    
    def clear_encode_process(self):
        self.free = True
        self.wait = False
        self.path = u''
        self.encodepass = 0
        self.progress = 0
        return
    
    def __update_encoder_progress(self, encoderthread):
        cpl = encoderthread.compile_pattern_list([pexpect.EOF, "\d+%"])
        match = encoderthread.expect_list(cpl, timeout=None)
        if match == 0: #EOF
            return False
        elif match == 1:
            prog = encoderthread.match.group(0).replace('%', '')
            self.progress = int(prog, 10)
            encoderthread.close
            return True
        else:
            return True 

    def __wait_for_move(self, path):
        if(SYSTEM == 'Windows'):
            while True:
                try:
                    f = open(path, 'rb')
                    f.close
                    break
                except IOError:
                    time.sleep(1)
                    continue
        else:
            #todo
            time.sleep(300)
        return
            
    def execute_encode_process(self):
        if (os.path.exists(self.path) == False):
            print('[%d]File not exist.' % self.index)
            print('   %s' % self.path)
            self.encodepass = 3
            return
        self.__wait_for_move(self.path)
        print("Start Encoding: %s" % self.path)
        (root, ext) = os.path.splitext(self.path)
        first_cmd = (
                u'"%s" %s -passlogfile "%s.log" -o "%s.avi" "%s"' %
                (ENCODER_PATH, FIRST_PASS_OPTION, root, root, self.path)
            )
        second_cmd = (
                u'"%s" %s -passlogfile "%s.log" -o "%s.avi" "%s"' %
                (ENCODER_PATH, SECOND_PASS_OPTION, root, root, self.path)
            )
        if(SYSTEM == 'Windows'):
            self.encodepass = 1
            p = subprocess.Popen(first_cmd.encode(OS_ENCODING), shell=False)
            self.progress = -1
            p.wait()
            self.encodepass = 2
            p = subprocess.Popen(second_cmd.encode(OS_ENCODING), shell=False)
            self.progress = -1
            p.wait()
        elif(SYSTEM == 'Linux'):
            self.encodepass = 1
            p = pexpect.spawn(first_cmd.encode(OS_ENCODING))
            while (self.__update_encoder_progress(p) == True):
                pass
            self.encodepass = 2
            p = pexpect.spawn(second_cmd.encode(OS_ENCODING))
            while (self.__update_encoder_progress(p) == True):
                pass
        else:
            #todo: unsupported system
            pass
        self.encodepass = 3
        return
    
    def run(self):
        while True:
            if (self.isnewtask() == True):
                self.free = False
                self.execute_encode_process()
            time.sleep(30)
        return

class Mover():
    def __init__(self):
        pass
    
    def __sortlistbylength (self, intr_list):
        list_len = len(intr_list)
        if (list_len < 2):
            return intr_list
        for i in range (0, list_len - 1):
            for j in range (0, list_len - 1 - i):
                if (len (intr_list[j]) < len (intr_list[j+1])):
                    intr_list[j], intr_list[j+1] = intr_list[j+1], intr_list[j]
        return intr_list
    
    def __getfolderlist(self, dir):
        folder_list = []
        for elem in os.listdir(dir):
            if (os.path.isdir(os.path.join (dir, elem))):
                folder_list.append(elem)
        return self.__sortlistbylength(folder_list)
    
    def __getdest(self, src, destdir, folderlist):
        filename = os.path.basename (src)
        for elem in folderlist:
            norm_folder_name = unicodedata.normalize ('NFKC', elem)
            norm_file_name   = unicodedata.normalize ('NFKC', filename)
            
            if (norm_file_name.find(norm_folder_name) > -1):
                return os.path.join (destdir, elem)
        return destdir
    
    def __moveavi(self, src, destdir):
        if os.path.exists(src) == False:
            print (u'Source file(%s) was not exist.' % (src))
            return src
        if os.path.exists(destdir) == False:
            print (u'Destination folder(%s) was not exist.' % (destdir))
            return src
        folderlist = self.__getfolderlist(destdir)
        dest = self.__getdest(src, destdir, folderlist)
        if (os.path.dirname (src) != dest):
            try:
                shutil.move (src, dest) #todo: ファイルの存在チェック, 重複してたらおしりに日時つければいいかな?
            except:
                pass
        return os.path.join(dest, os.path.basename(src))
    
    def move(self, src, destdir):
        (root, ext) = os.path.splitext(src)
        srcavi = root + u'.avi'
        srclog = root + u'.log'
        print(srcavi)
        movdir = self.__moveavi(srcavi, destdir)
        try:
            os.remove(srclog)
            os.remove(src)
        except:
            pass
        return movdir
        

def main():
    if (SYSTEM == 'Windows' and os.path.exists(ENCODER_PATH) != True):
        print('Encoder was not found.')
        sys.exit() 
    if (os.path.exists(WAIT_FOLDER) != True):
        print('Wait folder was not found.')
        sys.exit() 
    if (os.path.exists(PROCESS_FOLDER) != True):
        print('Process folder was not found.')
        sys.exit()
    
    tasklist = TaskList()
    tasklist.load(TASKLIST_PATH)
    tasklist.add_folder_to_wait(WAIT_FOLDER)
    
    mover = Mover()
    
    threads = []
    for i in range(MAX_THREAD):
        t = EncodeThread(i)
        threads.append(t)
        t.start()

    while True:
        for t in threads:
            if (t.isfree() == False):
                if (t.iscompleted() == True):
                    path = t.getpath()
                    tasklist.move_process_to_completed(path, mover.move(path, TV_FOLDER))
                    t.clear_encode_process()
                tasklist.update_process(t.getprocess())
                continue
            tasklist.add_folder_to_wait(WAIT_FOLDER)
            path = tasklist.get_highest_in_wait()
            if tasklist.move_wait_to_process(path) == True:
                t.setpath(path)
        tasklist.save(TASKLIST_PATH)
        time.sleep(30)

if __name__ == '__main__':
    main()


以下、ざっくりと処理の内容。

    threads = []
    for i in range(MAX_THREAD):
        t = EncodeThread(i)
        threads.append(t)
        t.start()

    while True:
        for t in threads:
            if (t.isfree() == False):
                #print ('[%d] Pass %d, Progress %d' % (t.getindex(), t.getencodepass(), t.getprogress()))
                if (t.iscompleted() == True):
                    #print ('[%d] Completed' % t.getindex())
                    path = t.getpath()
                    tasklist.move_process_to_completed(path, mover.move(path, TV_FOLDER))
                    t.clear_encode_process()
                    #print(json.dumps(tasklist.tasklist, sort_keys=True, indent=2))
                tasklist.update_process(t.getprocess())
                continue
            tasklist.add_folder_to_wait(WAIT_FOLDER)
            path = tasklist.get_highest_in_wait()
            if tasklist.move_wait_to_process(path) == True:
                t.setpath(path)
        tasklist.save(TASKLIST_PATH)
        time.sleep(30)

エンコード用のクラスEncodeThreadを指定スレッド数分作成。
各スレッドで
 プロセス追加→エンコードプロセス開始→プロセス監視→エンコードプロセス終了→ファイル移動→プロセス完了処理
を繰り返すようにしています。


        if(SYSTEM == 'Windows'):
            self.encodepass = 1
            p = subprocess.Popen(first_cmd.encode(OS_ENCODING), shell=False)
            self.progress = -1
            p.wait()
            self.encodepass = 2
            p = subprocess.Popen(second_cmd.encode(OS_ENCODING), shell=False)
            self.progress = -1
            p.wait()
        elif(SYSTEM == 'Linux'):
            self.encodepass = 1
            p = pexpect.spawn(first_cmd.encode(OS_ENCODING))
            while (self.__update_encoder_progress(p) == True):
                pass
            self.encodepass = 2
            p = pexpect.spawn(second_cmd.encode(OS_ENCODING))
            while (self.__update_encoder_progress(p) == True):
                pass

エンコードプロセスは、OS種類で投げ方、監視の仕方を変えました。
Windowsでpexpectが使えればよかったのですが、pexpect相当のwexpectが上手く使えなかったのでsubprocessモジュールで我慢してます…。

    def __update_encoder_progress(self, encoderthread):
        cpl = encoderthread.compile_pattern_list([pexpect.EOF, "\d+%"])
        match = encoderthread.expect_list(cpl, timeout=None)
        if match == 0: #EOF
            return False
        elif match == 1:
            prog = encoderthread.match.group(0).replace('%', '')
            self.progress = int(prog, 10)
            encoderthread.close
            return True
        else:
            return True 

Linux OSの場合はここを参考にpexpect.spawnで出力からエンコードの進捗を取得しています。

    def __moveavi(self, src, destdir):
        if os.path.exists(src) == False:
            print (u'Source file(%s) was not exist.' % (src))
            return src
        if os.path.exists(destdir) == False:
            print (u'Destination folder(%s) was not exist.' % (destdir))
            return src
        folderlist = self.__getfolderlist(destdir)
        dest = self.__getdest(src, destdir, folderlist)
        if (os.path.dirname (src) != dest):
            try:
                shutil.move (src, dest) #todo: ファイルの存在チェック, 重複してたらおしりに日時つければいいかな?
            except:
                pass
        return os.path.join(dest, os.path.basename(src))


エンコードプロセス完了後は、Moverクラスを使用してエンコード済みファイルを指定のフォルダへ移動させます。
その際、ファイル名の一部がフォルダ内のサブフォルダ名と一致するかを__getdest()内で判定し、該当するサブフォルダがある場合はそちらを優先して移動先とさせました。



エンコード状況を見れるようにするために、TaskListクラスを使用して進捗をjsonファイルに出力するようにしています。
jsonファイルの閲覧用スクリプトはまた後日投稿して行きたいと考えてます。


なんとなくで作っては見たものの、普段はC言語くらいしか触れていないため、クラスが上手く使えていないです。
こればかりは少しずつ学習して、慣れていかないとですね。



0 件のコメント:

コメントを投稿