diff --git a/split-csv.sh b/split-csv.sh index 00e4222..b29f3b4 100755 --- a/split-csv.sh +++ b/split-csv.sh @@ -1,11 +1,11 @@ #!/bin/bash -shuf metadata.csv > metadata-shuf.csv -cut --complement -f 2 -d\| metadata-shuf.csv > metadata-shuf-2.csv -awk -F '|' -v OFS='|' '$1 {print "wavs/" $1 ".wav" "|" $2}' metadata-shuf-2.csv > ljs_audio_text.txt -sed -n '1,50p' ljs_audio_text.txt > ljs_audio_text_val.txt -sed -n '51,200p' ljs_audio_text.txt > ljs_audio_text_test.txt -sed -n '201,$ p' ljs_audio_text.txt > ljs_audio_text_train_v3.txt +cut --complement -f 2 -d\| metadata.csv > metadata-cut.csv +awk -F '|' -v OFS='|' '$1 {print "wavs/" $1 ".wav" "|" $2}' metadata-cut.csv > ljs_audio_text.txt +shuf ljs_audio_text.txt > ljs_audio_text-shuf.txt +sed -n '1,50p' ljs_audio_text-shuf.txt > ljs_audio_text_val.txt +sed -n '51,200p' ljs_audio_text-shuf.txt > ljs_audio_text_test.txt +sed -n '201,$ p' ljs_audio_text-shuf.txt > ljs_audio_text_train_v3.txt awk -F '|' -v OFS='|' '$1 {print $1 "|" "pitch/" substr($1,6) ".pt" "|" $2}' ljs_audio_text_val.txt > ljs_audio_pitch_text_val.txt awk -F '|' -v OFS='|' '$1 {print $1 "|" "pitch/" substr($1,6) ".pt" "|" $2}' ljs_audio_text_test.txt > ljs_audio_pitch_text_test.txt awk -F '|' -v OFS='|' '$1 {print $1 "|" "pitch/" substr($1,6) ".pt" "|" $2}' ljs_audio_text_train_v3.txt > ljs_audio_pitch_text_train_v3.txt