@@ -59,8 +59,8 @@ def load_meta_data(self):
5959 item_name = raw_item_name = piece_path [len (processed_data_dir )+ 1 :].replace ('/' , '-' )[:- len (wav_suffix )]
6060 if len (self .processed_data_dirs ) > 1 :
6161 item_name = f'ds{ ds_id } _{ item_name } '
62- self .item2txt [item_name ] = open (f'{ piece_path .replace (wav_suffix , txt_suffix )} ' ).readline ()
63- self .item2ph [item_name ] = open (f'{ piece_path .replace (wav_suffix , ph_suffix )} ' ).readline ()
62+ self .item2txt [item_name ] = open (f'{ piece_path .replace (wav_suffix , txt_suffix )} ' , encoding = 'utf-8' ).readline ()
63+ self .item2ph [item_name ] = open (f'{ piece_path .replace (wav_suffix , ph_suffix )} ' , encoding = 'utf-8' ).readline ()
6464 self .item2wavfn [item_name ] = piece_path
6565
6666 self .item2spk [item_name ] = re .split ('-|#' , piece_path .split ('/' )[- 2 ])[0 ]
@@ -106,10 +106,10 @@ def _phone_encoder(self):
106106 for ph_sent in self .item2ph .values ():
107107 ph_set += ph_sent .split (' ' )
108108 ph_set = sorted (set (ph_set ))
109- json .dump (ph_set , open (ph_set_fn , 'w' ))
109+ json .dump (ph_set , open (ph_set_fn , 'w' , encoding = 'utf-8' ))
110110 print ("| Build phone set: " , ph_set )
111111 else :
112- ph_set = json .load (open (ph_set_fn , 'r' ))
112+ ph_set = json .load (open (ph_set_fn , 'r' , encoding = 'utf-8' ))
113113 print ("| Load phone set: " , ph_set )
114114 return build_phone_encoder (hparams ['binary_data_dir' ])
115115
@@ -189,7 +189,7 @@ class MidiSingingBinarizer(SingingBinarizer):
189189
190190 def load_meta_data (self ):
191191 for ds_id , processed_data_dir in enumerate (self .processed_data_dirs ):
192- meta_midi = json .load (open (os .path .join (processed_data_dir , 'meta.json' ))) # [list of dict]
192+ meta_midi = json .load (open (os .path .join (processed_data_dir , 'meta.json' ), encoding = 'utf-8' )) # [list of dict]
193193
194194 for song_item in meta_midi :
195195 item_name = raw_item_name = song_item ['item_name' ]
@@ -303,7 +303,7 @@ def split_train_test_set(self, item_names):
303303 def load_meta_data (self ):
304304 raw_data_dir = hparams ['raw_data_dir' ]
305305 # meta_midi = json.load(open(os.path.join(raw_data_dir, 'meta.json'))) # [list of dict]
306- utterance_labels = open (os .path .join (raw_data_dir , 'transcriptions.txt' )).readlines ()
306+ utterance_labels = open (os .path .join (raw_data_dir , 'transcriptions.txt' ), encoding = 'utf-8' ).readlines ()
307307
308308 for utterance_label in utterance_labels :
309309 song_info = utterance_label .split ('|' )
0 commit comments