February 23, 2019
I wanted to create docs for the course I enrolled which would help me for future reference, the course offers subtitle and I don’t wanted to search through subtitle then copy-paste content for docs so I decided to convert those subtitle files into text files and then copy whole text into mkdocs (which is the my reference documentation).
Code:
| """ | |
| Creates readable text file from SRT file. | |
| """ | |
| import re, sys | |
| import os | |
| def clean_up(lines): | |
| """ | |
| Get rid of all non-text lines and | |
| try to combine text broken into multiple lines | |
| """ | |
| srt_count = 1 | |
| new_lines = [] | |
| escape_timestamp = False | |
| for line in lines: | |
| if str(srt_count) == line.strip(): | |
| srt_count += 1 | |
| escape_timestamp = True | |
| continue | |
| elif escape_timestamp: | |
| escape_timestamp = False | |
| elif line == '\n': | |
| continue | |
| else: | |
| new_lines.append(line) | |
| return new_lines | |
| def main(args): | |
| """ | |
| Loops through the folder and creates extracted folder and copies content of file | |
| """ | |
| for (dirpath, dirnames, filenames) in os.walk('/Users/vinkrish/Documents/Intro to Statistics Subtitles'): | |
| headDir, tailDir = os.path.split(dirpath) | |
| newDir = '/Users/vinkrish/Documents/extracted/' + tailDir | |
| if not os.path.exists(newDir): | |
| try: | |
| os.mkdir(newDir) | |
| except OSError: | |
| print ("Creation of the directory failed") | |
| for file_name in filenames: | |
| head, tail = os.path.split(file_name) | |
| # os.rename(dirpath + "/" + file_name, dirpath + "/" + os.path.splitext(file_name)[0] + '.txt') | |
| if file_name != '.DS_Store.txt': | |
| print(file_name) | |
| with open(dirpath + "/" + file_name) as f: | |
| lines = f.readlines() | |
| print(len(lines)) | |
| new_lines = clean_up(lines) | |
| new_file_name = newDir + "/" + tail | |
| with open(new_file_name, 'w') as newFile: | |
| for line in new_lines: | |
| newFile.write(line) | |
| if __name__ == '__main__': | |
| main(sys.argv) | |
| """ | |
| NOTES | |
| * Run from command line as | |
| ** python subtitle-extract.py | |
| * Creates file_name.txt with extracted text from file_name.srt | |
| """ |