摘要
上一篇文章我们实现了整数与时间格式的互转,常见的字幕文件的格式有WebVTT, SRT, TTML, 有的系统要求我们提供VTT格式, 有的系统只支持TTML格式,我们字幕做完一个拿到的可能是SRT格式, 所以设计到将不同格式的字幕文件进行转换。
本文介绍的示例代码实现了VTT 与SRT互转, 也可以将VTT或SRT转化到TTML。
同样, 匹配时间格式的正则表达式是:
"([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)? --> ([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)?"
字幕格式对象是:
class ClosedCaption { public string StartPoint { get; set; } public string EndPoint { get; set; } public string Transcript { get; set; } public override string ToString() { StringBuilder sb = new StringBuilder(); sb.AppendLine(string.Format("{0} --> {1}", StartPoint, EndPoint)); sb.AppendLine(Transcript); return sb.ToString(); } }
从文件中读取字幕格式对象:
public static void ReadTranscript(string filePath) { //0:0:4.480 --> 0:0:7.430 string timePattern = @"([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)? --> ([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)?"; using (var stream = new FileStream(filePath, FileMode.Open)) { StreamReader reader = new StreamReader(stream); string fileContent = reader.ReadToEnd(); // handle CC time var cues = Regex.Matches(fileContent, timePattern, RegexOptions.IgnoreCase); Captions = new List<ClosedCaption>(); foreach (Match cue in cues) { string timeLine = cue.Value.ToString(); string[] timeInfo = timeLine.Split(new string[] { "-->" }, StringSplitOptions.RemoveEmptyEntries); if (timeInfo.Length == 2) { string startInfo = timeInfo[0].Trim(); string endInfo = timeInfo[1].Trim(); startInfo = TimeFormat.ToHHMMSS(TimeFormat.ToDouble(startInfo),"t1"); endInfo = TimeFormat.ToHHMMSS(TimeFormat.ToDouble(endInfo), "t1"); Captions.Add(new ClosedCaption { StartPoint = startInfo, EndPoint = endInfo }); } } string newContent = Regex.Replace(fileContent, timePattern, "-->"); string[] splitParts = newContent.Split(new string[] { "-->"},StringSplitOptions.RemoveEmptyEntries); if (splitParts.Length -1 == Captions.Count) { for (int i = 1; i < splitParts.Length; i++) { //Captions[i-1].Transcript = splitParts[i]; string rawTranscript = splitParts[i]; string firstTrim = rawTranscript.Trim(new char[] { ' ', ' ' }); //trim last digital character int digitalCount = 0; if (firstTrim.Length > 1) { for (int x = firstTrim.Length - 1; x > firstTrim.Length - 5; x--) { int d = 0; if (Int32.TryParse(firstTrim[x].ToString(), out d) == true) digitalCount++; else break; } } string secondTrim = firstTrim; if (digitalCount != 0) { secondTrim = firstTrim.Remove(firstTrim.Length - digitalCount); } Captions[i - 1].Transcript = secondTrim.Trim(new char[] { ' ', ' '}).Trim(); } } } }
由字幕对象生成VTT, SRT, 和TTML:
public static void Write2VTT(string vtt) { if (Captions.Count > 0) { StringBuilder sb = new StringBuilder(); sb.AppendLine("WEBVTT"); sb.AppendLine(); foreach (var item in Captions) { sb.AppendLine(item.ToString()); //here will input a blank line because of two AppendLine(); } using (StreamWriter writer = new StreamWriter(vtt, false)) { writer.Write(sb.ToString()); writer.Flush(); writer.Close(); } } } public static void Write2SRT(string srt) { if (Captions.Count > 0) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < Captions.Count; i++) { sb.AppendLine((i + 1).ToString()); sb.AppendLine(Captions[i].ToString()); // note here will input a blank line because of two AppendLine(); } using (StreamWriter writer = new StreamWriter(srt)) { writer.Write(sb.ToString()); writer.Flush(); writer.Close(); } } } public static void Write2TTML(string ttml) { StringBuilder sbContent = new StringBuilder(); string Content = string.Empty; using (StreamReader sr = new StreamReader("ttSample1.txt")) { Content = sr.ReadToEnd(); } if (Captions.Count > 0) { sbContent.AppendLine("<div region="subtitleArea">"); for (int i = 0; i < Captions.Count; i++) { double beginTime = TimeFormat.ToDouble(Captions[i].StartPoint); double endTime = TimeFormat.ToDouble(Captions[i].EndPoint); string begin = TimeFormat.ToHHMMSS(beginTime, "t1"); string end = TimeFormat.ToHHMMSS(endTime,"t1"); string content = HttpUtility.HtmlEncode(Captions[i].Transcript); sbContent.AppendLine(string.Format("<p begin="{1}" id="{0}" end="{2}">{3}</p>", "p" + i, begin, end, content)); } sbContent.AppendLine(@"</div>"); Content = string.Format(Content, sbContent.ToString()); using (StreamWriter writer = new StreamWriter(ttml)) { writer.Write(Content); writer.Flush(); writer.Close(); } } }
转化实例:CCConverter in gitHub