-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- add lf-utf8.py to enforce UTF-8 encoding, LF line ending, and new line at the end of the text file - add clang-format.py to run clang-format on every single C/C++ text file
- Loading branch information
0 parents
commit f524bce
Showing
4 changed files
with
331 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
* text=auto eol=lf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
#!/usr/bin/env python3 | ||
# encoding=utf-8 | ||
|
||
import os | ||
import subprocess | ||
import posixpath | ||
import threading | ||
from typing import List | ||
|
||
|
||
class Encoding: | ||
UTF8 = 'utf-8' | ||
UTF8_WITH_BOM = 'utf-8-sig' | ||
UTF16 = 'utf-16' | ||
GB2312 = 'gb2312' | ||
SHIFT_JIS = 'shift-jis' | ||
|
||
@classmethod | ||
def decode(cls, bs: bytes): | ||
try: | ||
encoding = cls.UTF8_WITH_BOM | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.UTF8 | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.UTF16 | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.GB2312 | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.SHIFT_JIS | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
return None, bs | ||
|
||
|
||
C_SOURCE_EXTENSIONS = [ | ||
'.h', | ||
'.c', | ||
'.cc', | ||
] | ||
|
||
|
||
def is_c_source_file(path: str): | ||
_, ext = posixpath.splitext(path) | ||
ext = ext.lower() | ||
if ext in C_SOURCE_EXTENSIONS: | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
def convert_string(bytes_input: bytes): | ||
encoding, s = Encoding.decode(bytes_input) | ||
|
||
if encoding == None: | ||
return str(bytes_input) | ||
else: | ||
return s | ||
|
||
|
||
class Command: | ||
def __init__(self, cmd: List[str]): | ||
self.cmd = cmd | ||
|
||
# type is hinted implicitly (subprocess.Popen) | ||
self.p = None | ||
|
||
# The process is terminated by us because it took too long. | ||
# If this flag is True then the output is broken. | ||
self.terminated = False | ||
self.stdout = None | ||
self.stderr = None | ||
|
||
def target(self): | ||
print('>', ' '.join(self.cmd)) | ||
self.process = subprocess.Popen( | ||
self.cmd, | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
) | ||
|
||
self.stdout, self.stderr = self.process.communicate() | ||
|
||
def run(self, timeout=5, raise_on_error=True): | ||
thread = threading.Thread(target=self.target) | ||
thread.start() | ||
thread.join(timeout) | ||
|
||
if thread.is_alive(): | ||
self.terminated = True | ||
self.process.terminate() | ||
# TODO Will call block our main thread for a long time? | ||
thread.join() | ||
|
||
if raise_on_error: | ||
if self.terminated: | ||
raise Exception( | ||
f'The process is terminated because it took too long to excute!\n' | ||
f'{convert_string(self.process.stderr)}' | ||
) | ||
|
||
if self.process.returncode != 0: | ||
raise Exception(f'') | ||
|
||
# if self.stdout is not None: | ||
# output = convert_string(self.stdout) | ||
# print(output) | ||
|
||
|
||
if __name__ == '__main__': | ||
completed_process = subprocess.run( | ||
['git', 'ls-files'], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.PIPE, | ||
) | ||
|
||
lines = completed_process.stdout.decode('utf-8').split('\n') | ||
|
||
filepaths = filter(lambda x: len(x) > 0, lines) | ||
filepaths = filter(is_c_source_file, filepaths) | ||
filepaths = list(filepaths) | ||
|
||
print(filepaths) | ||
for filepath in filepaths: | ||
file_content = Encoding.decode(open(filepath, mode='rb').read()) | ||
|
||
cmd = ['clang-format', '-style=file', filepath] | ||
sp = Command(cmd) | ||
sp.run() | ||
if sp.stdout is not None: | ||
clang_formatted_content = convert_string(sp.stdout) | ||
# TODO add 'check' or 'format' flags | ||
|
||
if file_content != clang_formatted_content: | ||
utf8_encoded_content = clang_formatted_content.encode('utf-8') | ||
open(filepath, mode='wb').write(utf8_encoded_content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#!/usr/bin/env python3 | ||
# encoding=utf-8 | ||
import os | ||
import mimetypes | ||
import traceback | ||
import subprocess | ||
from subprocess import PIPE | ||
|
||
from tqdm import tqdm | ||
|
||
|
||
class Encoding: | ||
UTF8 = 'utf-8' | ||
UTF8_WITH_BOM = 'utf-8-sig' | ||
UTF16 = 'utf-16' | ||
GB2312 = 'gb2312' | ||
SHIFT_JIS = 'shift-jis' | ||
|
||
@classmethod | ||
def decode(cls, bs: bytes): | ||
try: | ||
encoding = cls.UTF8_WITH_BOM | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.UTF8 | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.UTF16 | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.GB2312 | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
try: | ||
encoding = cls.SHIFT_JIS | ||
decoded_content = bs.decode(encoding) | ||
return encoding, decoded_content | ||
except Exception as ex: | ||
# traceback.print_exc() | ||
pass | ||
|
||
return None, bs | ||
|
||
|
||
skips = [ | ||
'.git', # git directory | ||
'logs', # log directory | ||
'Backup', # Visual Studio project migration files | ||
# known Visual Studio files | ||
'bin', | ||
'obj', | ||
'.vs', | ||
] | ||
|
||
skip_extensions = [ | ||
'.bomb', | ||
'.map', | ||
# Microsoft Excel files | ||
'.xls', | ||
# known binary extensions | ||
'.dll', | ||
'.jpg', | ||
'.gif', | ||
'.png', | ||
# weird files from Visual Studio | ||
'.suo', | ||
] | ||
|
||
|
||
def find_all_files(infile): | ||
basename = os.path.basename(infile) | ||
if basename in skips: | ||
return [] | ||
|
||
retval = [] | ||
|
||
if os.path.isfile(infile): | ||
ext = os.path.splitext(infile)[1].lower() | ||
if ext in skip_extensions: | ||
return [] | ||
else: | ||
return [infile] | ||
|
||
elif os.path.isdir(infile): | ||
flist = os.listdir(infile) | ||
for fname in flist: | ||
fpath = os.path.join(infile, fname) | ||
retval.extend(find_all_files(fpath)) | ||
|
||
return retval | ||
|
||
|
||
def check_clang_format(): | ||
pass | ||
|
||
|
||
if __name__ == '__main__': | ||
# all files | ||
# file_list = find_all_files('.') | ||
|
||
# tracked files only | ||
completed_process = subprocess.run( | ||
['git', 'ls-files'], | ||
stdout=PIPE, | ||
stderr=PIPE, | ||
) | ||
|
||
lines = completed_process.stdout.decode('utf-8').split('\n') | ||
|
||
file_list = list(filter(lambda x: len(x) > 0, lines)) | ||
|
||
pbar = tqdm(file_list) | ||
for fpath in pbar: | ||
pbar.set_description(fpath) | ||
# mime = mimetypes.guess_type(fpath) | ||
# print(mime, fpath) | ||
|
||
if not os.path.exists(fpath): | ||
continue | ||
|
||
basename = os.path.basename(fpath) | ||
ext = os.path.splitext(basename)[1].lower() | ||
if ext in skip_extensions: | ||
continue | ||
|
||
bs = open(fpath, mode='rb').read() | ||
encoding, decoded_string = Encoding.decode(bs) | ||
|
||
if encoding is None: | ||
continue | ||
|
||
if not encoding == Encoding.UTF8: | ||
open(fpath, mode='w', encoding=Encoding.UTF8).write(decoded_string) | ||
|
||
# enforce LF line ending | ||
content = decoded_string.replace('\r\n', '\n') | ||
content = content.strip('\n') | ||
|
||
# append empty line at the end | ||
# it's good practice for Git | ||
content = content + '\n' | ||
|
||
os.remove(fpath) # file will not be changed if we don't remove it | ||
with open(fpath, mode='wb') as outfile: | ||
encoded_content = content.encode(Encoding.UTF8) | ||
outfile.write(encoded_content) | ||
# print(encoding, fpath) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Utility scripts for various things. To be used by `git submodule`. |