mirror of https://github.com/chubin/wttr.in
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
2.9 KiB
75 lines
2.9 KiB
import os
|
|
|
|
def remove_colon_and_strip_from_str(line):
|
|
"""
|
|
Removes the colon from the line and strips the line.
|
|
"""
|
|
return line.replace(":", "").strip()
|
|
|
|
def print_result_for_file(file_path, file_name, duplicate_entries):
|
|
"""
|
|
Prints the result for a given file.
|
|
"""
|
|
print(f"-" * 50)
|
|
print(f"Processing file: {file_name} \n")
|
|
# keep entries with more than one occurence
|
|
if len(duplicate_entries) > 0:
|
|
for key, value in duplicate_entries.items():
|
|
# prints debug info for each duplicate found
|
|
print(f"{file_path}: \"{key}\" appears in lines {", ".join(map(str, value))}")
|
|
else:
|
|
# prints debug info, if no duplicates found 🥳
|
|
print(f"No duplicates found!")
|
|
|
|
def find_duplicates(directory, debug=False):
|
|
"""
|
|
Reads all .txt files in a given directory and tries to detect duplicate entries.
|
|
"""
|
|
try:
|
|
language_lookup_table = {}
|
|
files = [f for f in os.listdir(directory) if f.endswith('.txt')]
|
|
files.sort()
|
|
|
|
if not files:
|
|
print("No .txt files found in the directory.")
|
|
return
|
|
|
|
for file_name in files:
|
|
# if file_name contains "-help" skip it for now!
|
|
if "-help" in file_name:
|
|
continue
|
|
|
|
file_path = os.path.join(directory, file_name)
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
lookup_table = {}
|
|
for line_number, line in enumerate(file, start=1):
|
|
stripped_line = line.strip()
|
|
stripped_keywords = stripped_line.split(":")
|
|
stripped_keywords = list(map(remove_colon_and_strip_from_str, stripped_keywords))
|
|
trimmed_keywords = list(map(str.strip, stripped_keywords))
|
|
|
|
for tk in trimmed_keywords:
|
|
if tk == "" or tk.isdigit():
|
|
continue
|
|
if tk in lookup_table:
|
|
lookup_table[tk].append(line_number)
|
|
else:
|
|
lookup_table[tk] = [line_number]
|
|
duplicate_entries = {k: v for k, v in lookup_table.items() if len(v) > 1}
|
|
print_result_for_file(file_path, file_name, duplicate_entries)
|
|
language_lookup_table[file_name] = duplicate_entries
|
|
except Exception as e:
|
|
print(f"An error occurred while processing the file: {e}")
|
|
|
|
return language_lookup_table
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
return None
|
|
|
|
# Example usage from the root directory:
|
|
# python ./lib/duplicate_translations.py
|
|
if __name__ == "__main__":
|
|
directory_path = "share/translations/"
|
|
find_duplicates(directory_path, debug=True)
|