Skip to content

Commit e830f20

Browse files
committed
set encoding to support Windows
1 parent 40aa82b commit e830f20

8 files changed

Lines changed: 17 additions & 17 deletions

File tree

data_gen/singing/binarize.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ def load_meta_data(self):
5959
item_name = raw_item_name = piece_path[len(processed_data_dir)+1:].replace('/', '-')[:-len(wav_suffix)]
6060
if len(self.processed_data_dirs) > 1:
6161
item_name = f'ds{ds_id}_{item_name}'
62-
self.item2txt[item_name] = open(f'{piece_path.replace(wav_suffix, txt_suffix)}').readline()
63-
self.item2ph[item_name] = open(f'{piece_path.replace(wav_suffix, ph_suffix)}').readline()
62+
self.item2txt[item_name] = open(f'{piece_path.replace(wav_suffix, txt_suffix)}', encoding='utf-8').readline()
63+
self.item2ph[item_name] = open(f'{piece_path.replace(wav_suffix, ph_suffix)}', encoding='utf-8').readline()
6464
self.item2wavfn[item_name] = piece_path
6565

6666
self.item2spk[item_name] = re.split('-|#', piece_path.split('/')[-2])[0]
@@ -106,10 +106,10 @@ def _phone_encoder(self):
106106
for ph_sent in self.item2ph.values():
107107
ph_set += ph_sent.split(' ')
108108
ph_set = sorted(set(ph_set))
109-
json.dump(ph_set, open(ph_set_fn, 'w'))
109+
json.dump(ph_set, open(ph_set_fn, 'w', encoding='utf-8'))
110110
print("| Build phone set: ", ph_set)
111111
else:
112-
ph_set = json.load(open(ph_set_fn, 'r'))
112+
ph_set = json.load(open(ph_set_fn, 'r', encoding='utf-8'))
113113
print("| Load phone set: ", ph_set)
114114
return build_phone_encoder(hparams['binary_data_dir'])
115115

@@ -189,7 +189,7 @@ class MidiSingingBinarizer(SingingBinarizer):
189189

190190
def load_meta_data(self):
191191
for ds_id, processed_data_dir in enumerate(self.processed_data_dirs):
192-
meta_midi = json.load(open(os.path.join(processed_data_dir, 'meta.json'))) # [list of dict]
192+
meta_midi = json.load(open(os.path.join(processed_data_dir, 'meta.json'), encoding='utf-8')) # [list of dict]
193193

194194
for song_item in meta_midi:
195195
item_name = raw_item_name = song_item['item_name']
@@ -303,7 +303,7 @@ def split_train_test_set(self, item_names):
303303
def load_meta_data(self):
304304
raw_data_dir = hparams['raw_data_dir']
305305
# meta_midi = json.load(open(os.path.join(raw_data_dir, 'meta.json'))) # [list of dict]
306-
utterance_labels = open(os.path.join(raw_data_dir, 'transcriptions.txt')).readlines()
306+
utterance_labels = open(os.path.join(raw_data_dir, 'transcriptions.txt'), encoding='utf-8').readlines()
307307

308308
for utterance_label in utterance_labels:
309309
song_info = utterance_label.split('|')

data_gen/tts/base_binarizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,11 +85,11 @@ def _phone_encoder(self):
8585
ph_set = []
8686
if hparams['reset_phone_dict'] or not os.path.exists(ph_set_fn):
8787
for processed_data_dir in self.processed_data_dirs:
88-
ph_set += [x.split(' ')[0] for x in open(f'{processed_data_dir}/dict.txt').readlines()]
88+
ph_set += [x.split(' ')[0] for x in open(f'{processed_data_dir}/dict.txt', encoding='utf-8').readlines()]
8989
ph_set = sorted(set(ph_set))
90-
json.dump(ph_set, open(ph_set_fn, 'w'))
90+
json.dump(ph_set, open(ph_set_fn, 'w', encoding='utf-8'))
9191
else:
92-
ph_set = json.load(open(ph_set_fn, 'r'))
92+
ph_set = json.load(open(ph_set_fn, 'r', encoding='utf-8'))
9393
print("| phone set: ", ph_set)
9494
return build_phone_encoder(hparams['binary_data_dir'])
9595

@@ -113,7 +113,7 @@ def process(self):
113113
self.spk_map = self.build_spk_map()
114114
print("| spk_map: ", self.spk_map)
115115
spk_map_fn = f"{hparams['binary_data_dir']}/spk_map.json"
116-
json.dump(self.spk_map, open(spk_map_fn, 'w'))
116+
json.dump(self.spk_map, open(spk_map_fn, 'w', encoding='utf-8'))
117117

118118
self.phone_encoder = self._phone_encoder()
119119
self.process_data('valid')

data_gen/tts/data_gen_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def toJson(self):
273273

274274
def get_mel2ph(tg_fn, ph, mel, hparams):
275275
ph_list = ph.split(" ")
276-
with open(tg_fn, "r") as f:
276+
with open(tg_fn, "r", encoding='utf-8') as f:
277277
tg = f.readlines()
278278
tg = remove_empty_lines(tg)
279279
tg = TextGrid(tg)
@@ -339,7 +339,7 @@ def get_mel2ph(tg_fn, ph, mel, hparams):
339339

340340
def build_phone_encoder(data_dir):
341341
phone_list_file = os.path.join(data_dir, 'phone_set.json')
342-
phone_list = json.load(open(phone_list_file))
342+
phone_list = json.load(open(phone_list_file, encoding='utf-8'))
343343
return TokenTextEncoder(None, vocab_list=phone_list, replace_oov=',')
344344

345345

inference/svs/opencpop/map.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
def cpop_pinyin2ph_func():
22
# In the README file of opencpop dataset, they defined a "pinyin to phoneme mapping table"
33
pinyin2phs = {'AP': 'AP', 'SP': 'SP'}
4-
with open('inference/svs/opencpop/cpop_pinyin2ph.txt') as rf:
4+
with open('inference/svs/opencpop/cpop_pinyin2ph.txt', encoding='utf-8') as rf:
55
for line in rf.readlines():
66
elements = [x.strip() for x in line.split('|') if x.strip() != '']
77
pinyin2phs[elements[0]] = elements[1]

modules/parallel_wavegan/utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def __init__(self, feats_scp, default_hdf5_path="feats"):
137137
138138
"""
139139
self.default_hdf5_path = default_hdf5_path
140-
with open(feats_scp) as f:
140+
with open(feats_scp, encoding='utf-8') as f:
141141
lines = [line.replace("\n", "") for line in f.readlines()]
142142
self.data = {}
143143
for line in lines:

tasks/tts/tts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def shuffle_batches(batches):
9595
def build_phone_encoder(self, data_dir):
9696
phone_list_file = os.path.join(data_dir, 'phone_set.json')
9797

98-
phone_list = json.load(open(phone_list_file))
98+
phone_list = json.load(open(phone_list_file, encoding='utf-8'))
9999
return TokenTextEncoder(None, vocab_list=phone_list, replace_oov=',')
100100

101101
def build_optimizer(self, model):

utils/hparams.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ def load_config(config_fn): # deep first
9696

9797
if args_work_dir != '' and (not os.path.exists(ckpt_config_path) or args.reset) and not args.infer:
9898
os.makedirs(hparams_['work_dir'], exist_ok=True)
99-
with open(ckpt_config_path, 'w') as f:
99+
with open(ckpt_config_path, 'w', encoding='utf-8') as f:
100100
yaml.safe_dump(hparams_, f)
101101

102102
hparams_['infer'] = args.infer

vocoders/hifigan.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def load_model(config_path, checkpoint_path):
2121
config = set_hparams(config_path, global_hparams=False)
2222
state = ckpt_dict["state_dict"]["model_gen"]
2323
elif '.json' in config_path:
24-
config = json.load(open(config_path, 'r'))
24+
config = json.load(open(config_path, 'r', encoding='utf-8'))
2525
state = ckpt_dict["generator"]
2626

2727
model = HifiGanGenerator(config)

0 commit comments

Comments
 (0)