Skip to content

Commit

Permalink
🎉 init commit
Browse files Browse the repository at this point in the history
- add lf-utf8.py to enforce UTF-8 encoding, LF line ending,
and new line at the end of the text file
- add clang-format.py to run clang-format on every single
C/C++ text file
  • Loading branch information
ichisadashioko committed Jul 30, 2020
0 parents commit f524bce
Show file tree
Hide file tree
Showing 4 changed files with 331 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=lf
163 changes: 163 additions & 0 deletions clang-format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#!/usr/bin/env python3
# encoding=utf-8

import os
import subprocess
import posixpath
import threading
from typing import List


class Encoding:
UTF8 = 'utf-8'
UTF8_WITH_BOM = 'utf-8-sig'
UTF16 = 'utf-16'
GB2312 = 'gb2312'
SHIFT_JIS = 'shift-jis'

@classmethod
def decode(cls, bs: bytes):
try:
encoding = cls.UTF8_WITH_BOM
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.UTF8
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.UTF16
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.GB2312
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.SHIFT_JIS
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

return None, bs


C_SOURCE_EXTENSIONS = [
'.h',
'.c',
'.cc',
]


def is_c_source_file(path: str):
_, ext = posixpath.splitext(path)
ext = ext.lower()
if ext in C_SOURCE_EXTENSIONS:
return True
else:
return False


def convert_string(bytes_input: bytes):
encoding, s = Encoding.decode(bytes_input)

if encoding == None:
return str(bytes_input)
else:
return s


class Command:
def __init__(self, cmd: List[str]):
self.cmd = cmd

# type is hinted implicitly (subprocess.Popen)
self.p = None

# The process is terminated by us because it took too long.
# If this flag is True then the output is broken.
self.terminated = False
self.stdout = None
self.stderr = None

def target(self):
print('>', ' '.join(self.cmd))
self.process = subprocess.Popen(
self.cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)

self.stdout, self.stderr = self.process.communicate()

def run(self, timeout=5, raise_on_error=True):
thread = threading.Thread(target=self.target)
thread.start()
thread.join(timeout)

if thread.is_alive():
self.terminated = True
self.process.terminate()
# TODO Will call block our main thread for a long time?
thread.join()

if raise_on_error:
if self.terminated:
raise Exception(
f'The process is terminated because it took too long to excute!\n'
f'{convert_string(self.process.stderr)}'
)

if self.process.returncode != 0:
raise Exception(f'')

# if self.stdout is not None:
# output = convert_string(self.stdout)
# print(output)


if __name__ == '__main__':
completed_process = subprocess.run(
['git', 'ls-files'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)

lines = completed_process.stdout.decode('utf-8').split('\n')

filepaths = filter(lambda x: len(x) > 0, lines)
filepaths = filter(is_c_source_file, filepaths)
filepaths = list(filepaths)

print(filepaths)
for filepath in filepaths:
file_content = Encoding.decode(open(filepath, mode='rb').read())

cmd = ['clang-format', '-style=file', filepath]
sp = Command(cmd)
sp.run()
if sp.stdout is not None:
clang_formatted_content = convert_string(sp.stdout)
# TODO add 'check' or 'format' flags

if file_content != clang_formatted_content:
utf8_encoded_content = clang_formatted_content.encode('utf-8')
open(filepath, mode='wb').write(utf8_encoded_content)
166 changes: 166 additions & 0 deletions lf-utf8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#!/usr/bin/env python3
# encoding=utf-8
import os
import mimetypes
import traceback
import subprocess
from subprocess import PIPE

from tqdm import tqdm


class Encoding:
UTF8 = 'utf-8'
UTF8_WITH_BOM = 'utf-8-sig'
UTF16 = 'utf-16'
GB2312 = 'gb2312'
SHIFT_JIS = 'shift-jis'

@classmethod
def decode(cls, bs: bytes):
try:
encoding = cls.UTF8_WITH_BOM
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.UTF8
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.UTF16
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.GB2312
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

try:
encoding = cls.SHIFT_JIS
decoded_content = bs.decode(encoding)
return encoding, decoded_content
except Exception as ex:
# traceback.print_exc()
pass

return None, bs


skips = [
'.git', # git directory
'logs', # log directory
'Backup', # Visual Studio project migration files
# known Visual Studio files
'bin',
'obj',
'.vs',
]

skip_extensions = [
'.bomb',
'.map',
# Microsoft Excel files
'.xls',
# known binary extensions
'.dll',
'.jpg',
'.gif',
'.png',
# weird files from Visual Studio
'.suo',
]


def find_all_files(infile):
basename = os.path.basename(infile)
if basename in skips:
return []

retval = []

if os.path.isfile(infile):
ext = os.path.splitext(infile)[1].lower()
if ext in skip_extensions:
return []
else:
return [infile]

elif os.path.isdir(infile):
flist = os.listdir(infile)
for fname in flist:
fpath = os.path.join(infile, fname)
retval.extend(find_all_files(fpath))

return retval


def check_clang_format():
pass


if __name__ == '__main__':
# all files
# file_list = find_all_files('.')

# tracked files only
completed_process = subprocess.run(
['git', 'ls-files'],
stdout=PIPE,
stderr=PIPE,
)

lines = completed_process.stdout.decode('utf-8').split('\n')

file_list = list(filter(lambda x: len(x) > 0, lines))

pbar = tqdm(file_list)
for fpath in pbar:
pbar.set_description(fpath)
# mime = mimetypes.guess_type(fpath)
# print(mime, fpath)

if not os.path.exists(fpath):
continue

basename = os.path.basename(fpath)
ext = os.path.splitext(basename)[1].lower()
if ext in skip_extensions:
continue

bs = open(fpath, mode='rb').read()
encoding, decoded_string = Encoding.decode(bs)

if encoding is None:
continue

if not encoding == Encoding.UTF8:
open(fpath, mode='w', encoding=Encoding.UTF8).write(decoded_string)

# enforce LF line ending
content = decoded_string.replace('\r\n', '\n')
content = content.strip('\n')

# append empty line at the end
# it's good practice for Git
content = content + '\n'

os.remove(fpath) # file will not be changed if we don't remove it
with open(fpath, mode='wb') as outfile:
encoded_content = content.encode(Encoding.UTF8)
outfile.write(encoded_content)
# print(encoding, fpath)
1 change: 1 addition & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Utility scripts for various things. To be used by `git submodule`.

0 comments on commit f524bce

Please sign in to comment.