104 lines
3.2 KiB
Text
104 lines
3.2 KiB
Text
|
#!/usr/bin/env python3
|
||
|
import re
|
||
|
import os
|
||
|
import nltk
|
||
|
from nltk.corpus import words
|
||
|
from nltk.corpus import wordnet
|
||
|
|
||
|
try:
|
||
|
word_list = words.words()
|
||
|
nltk.download('wordnet')
|
||
|
except LookupError:
|
||
|
nltk.download('words')
|
||
|
nltk.download('wordnet')
|
||
|
word_list = words.words()
|
||
|
|
||
|
word_set = set(word.lower() for word in word_list)
|
||
|
common_words = ['and', 'in', 'the', 'of', 'to', 'at', 'by', 'for', 'with', 'from']
|
||
|
always_valid = {'the', 'a', 'an', 'and', 'or', 'but', 'nor', 'for', 'yet', 'so'}
|
||
|
|
||
|
def is_word(word):
|
||
|
if word.lower() in always_valid:
|
||
|
print(f" Checking if '{word}' is in dictionary: True (common word)")
|
||
|
return True
|
||
|
|
||
|
in_words = word.lower() in word_set
|
||
|
in_wordnet = bool(wordnet.synsets(word))
|
||
|
result = in_words and in_wordnet
|
||
|
print(f" Checking if '{word}' is in dictionary: {result} (words:{in_words}, wordnet:{in_wordnet})")
|
||
|
return result
|
||
|
|
||
|
def process_word(word):
|
||
|
print(f"\nProcessing word: '{word}'")
|
||
|
|
||
|
if is_word(word):
|
||
|
print(f" '{word}' is in dictionary, returning as-is")
|
||
|
return word
|
||
|
|
||
|
print(f" '{word}' not in dictionary, checking for common words at end...")
|
||
|
for common in common_words:
|
||
|
if word.lower().endswith(common):
|
||
|
print(f" Found '{common}' at end of '{word}'")
|
||
|
remainder = word[:-len(common)]
|
||
|
common_case = word[-len(common):]
|
||
|
print(f" Recursively processing remainder: '{remainder}'")
|
||
|
return f"{process_word(remainder)} {common_case}"
|
||
|
|
||
|
print(f" No common words found at end of '{word}'")
|
||
|
|
||
|
match = re.search(r'([a-zA-Z]+)(\d+)$', word)
|
||
|
if match:
|
||
|
text, num = match.groups()
|
||
|
print(f" Found number at end: '{text}' + '{num}'")
|
||
|
if is_word(text):
|
||
|
return f"{text} {num}"
|
||
|
|
||
|
print(f" Returning '{word}' unchanged")
|
||
|
return word
|
||
|
|
||
|
def split_filename(filename):
|
||
|
print(f"\nProcessing filename: {filename}")
|
||
|
base = os.path.splitext(filename)[0]
|
||
|
ext = os.path.splitext(filename)[1]
|
||
|
|
||
|
print(f"Splitting on delimiters...")
|
||
|
parts = re.split('([_\-\s])', base)
|
||
|
|
||
|
result = []
|
||
|
for part in parts:
|
||
|
if part in '_-':
|
||
|
result.append(' ')
|
||
|
else:
|
||
|
print(f"\nSplitting on capitals: {part}")
|
||
|
words = re.split('(?<!^)(?=[A-Z])', part)
|
||
|
print(f"Got words: {words}")
|
||
|
processed = [process_word(word) for word in words]
|
||
|
result.append(' '.join(processed))
|
||
|
|
||
|
final = ' '.join(''.join(result).split())
|
||
|
return final + ext
|
||
|
|
||
|
def main():
|
||
|
# Get all files in current directory
|
||
|
files = [f for f in os.listdir('.') if os.path.isfile(f)]
|
||
|
|
||
|
for filename in files:
|
||
|
if filename.startswith('.'): # Skip hidden files
|
||
|
continue
|
||
|
|
||
|
print(f"\n{'='*50}")
|
||
|
print(f"Original: {filename}")
|
||
|
new_name = split_filename(filename)
|
||
|
print(f"New name: {new_name}")
|
||
|
|
||
|
if new_name != filename:
|
||
|
try:
|
||
|
os.rename(filename, new_name)
|
||
|
print(f"Renamed: {filename} -> {new_name}")
|
||
|
except OSError as e:
|
||
|
print(f"Error renaming {filename}: {e}")
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|
||
|
|