Generating XML output

This commit is contained in:
Daniel Wolf 2015-11-25 22:00:24 +01:00
parent 2ef99119b0
commit 27ba3ef357
5 changed files with 58 additions and 14 deletions

View File

@ -7,6 +7,7 @@
// For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php // For reference, see http://sunewatts.dk/lipsync/lipsync/article_02.php
// For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H. // For visual examples, see https://flic.kr/s/aHsj86KR4J. Their shapes "BMP".."L" map to A..H.
enum class Shape { enum class Shape {
Invalid = -1,
A, // Closed mouth (silence, M, B, P) A, // Closed mouth (silence, M, B, P)
B, // Clenched teeth (most vowels, m[e]n) B, // Clenched teeth (most vowels, m[e]n)
C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...) C, // Mouth slightly open (b[ir]d, s[ay], w[i]n...)

View File

@ -1,6 +1,8 @@
#ifndef LIPSYNC_CENTISECONDS_H #ifndef LIPSYNC_CENTISECONDS_H
#define LIPSYNC_CENTISECONDS_H #define LIPSYNC_CENTISECONDS_H
#include <chrono>
typedef std::chrono::duration<int, std::centi> centiseconds; typedef std::chrono::duration<int, std::centi> centiseconds;
std::ostream& operator <<(std::ostream& stream, const centiseconds cs); std::ostream& operator <<(std::ostream& stream, const centiseconds cs);

View File

@ -1,4 +1,7 @@
#include <iostream> #include <iostream>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include <format.h>
#include "audio_input/WaveFileReader.h" #include "audio_input/WaveFileReader.h"
#include "phone_extraction.h" #include "phone_extraction.h"
#include "mouth_animation.h" #include "mouth_animation.h"
@ -8,6 +11,11 @@ using std::exception;
using std::string; using std::string;
using std::wstring; using std::wstring;
using std::unique_ptr; using std::unique_ptr;
using std::map;
using std::chrono::duration;
using std::chrono::duration_cast;
using boost::filesystem::path;
using boost::property_tree::ptree;
string getMessage(const exception& e) { string getMessage(const exception& e) {
string result(e.what()); string result(e.what());
@ -20,7 +28,7 @@ string getMessage(const exception& e) {
return result; return result;
} }
unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) { unique_ptr<AudioStream> createAudioStream(path filePath) {
try { try {
return unique_ptr<AudioStream>(new WaveFileReader(filePath)); return unique_ptr<AudioStream>(new WaveFileReader(filePath));
} catch (...) { } catch (...) {
@ -28,6 +36,37 @@ unique_ptr<AudioStream> createAudioStream(boost::filesystem::path filePath) {
} }
} }
string formatDuration(duration<double> seconds) {
return fmt::format("{0:.2f}", seconds.count());
}
ptree createXmlTree(const path& filePath, const map<centiseconds, Phone>& phones, const map<centiseconds, Shape>& shapes) {
ptree tree;
// Add sound file path
tree.add("rhubarbResult.info.soundFile", filePath.string());
// Add phones
for (auto it = phones.cbegin(), itNext = ++phones.cbegin(); itNext != phones.cend(); ++it, ++itNext) {
auto pair = *it;
auto nextPair = *itNext;
ptree& phoneElement = tree.add("rhubarbResult.phones.phone", pair.second);
phoneElement.add("<xmlattr>.start", formatDuration(pair.first));
phoneElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
}
// Add mouth cues
for (auto it = shapes.cbegin(), itNext = ++shapes.cbegin(); itNext != shapes.cend(); ++it, ++itNext) {
auto pair = *it;
auto nextPair = *itNext;
ptree& mouthCueElement = tree.add("rhubarbResult.mouthCues.mouthCue", pair.second);
mouthCueElement.add("<xmlattr>.start", formatDuration(pair.first));
mouthCueElement.add("<xmlattr>.duration", formatDuration(nextPair.first - pair.first));
}
return tree;
}
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
try { try {
// Get sound file name // Get sound file name
@ -41,21 +80,18 @@ int main(int argc, char *argv[]) {
unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName); unique_ptr<AudioStream> audioStream = createAudioStream(soundFileName);
// Detect phones // Detect phones
std::map<centiseconds, Phone> phones = detectPhones(std::move(audioStream)); map<centiseconds, Phone> phones = detectPhones(std::move(audioStream));
// Generate mouth shapes // Generate mouth shapes
std::map<centiseconds, Shape> shapes = animate(phones); map<centiseconds, Shape> shapes = animate(phones);
for (auto &pair : phones) { // Print XML
std::cout << pair.first << ": " << pair.second << "\n"; boost::property_tree::ptree xmlTree = createXmlTree(soundFileName, phones, shapes);
} boost::property_tree::write_xml(std::cout, xmlTree, boost::property_tree::xml_writer_settings<string>(' ', 2));
for (auto &pair : shapes) {
std::cout << pair.first << ": " << pair.second << "\n";
}
return 0; return 0;
} catch (const exception& e) { } catch (const exception& e) {
std::cout << "An error occurred. " << getMessage(e); std::cerr << "An error occurred. " << getMessage(e);
return 1; return 1;
} }
} }

View File

@ -68,8 +68,13 @@ Shape getShape(Phone phone) {
map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) { map<centiseconds, Shape> animate(const map<centiseconds, Phone> &phones) {
map<centiseconds, Shape> shapes; map<centiseconds, Shape> shapes;
Shape lastShape = Shape::Invalid;
for (auto& pair : phones) { for (auto& pair : phones) {
shapes[pair.first] = getShape(pair.second); Shape shape = getShape(pair.second);
if (shape != lastShape) {
shapes[pair.first] = shape;
lastShape = shape;
}
} }
return shapes; return shapes;
} }

View File

@ -105,6 +105,7 @@ void processAudioStream(AudioStream& audioStream16kHzMono, ps_decoder_t& recogni
map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) { map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
map<centiseconds, Phone> result; map<centiseconds, Phone> result;
ps_seg_t *segmentationIter; ps_seg_t *segmentationIter;
result[centiseconds(0)] = Phone::None;
int32 score; int32 score;
int endFrame; int endFrame;
for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) { for (segmentationIter = ps_seg_iter(&recognizer, &score); segmentationIter; segmentationIter = ps_seg_next(segmentationIter)) {
@ -116,9 +117,8 @@ map<centiseconds, Phone> getPhones(ps_decoder_t& recognizer) {
ps_seg_frames(segmentationIter, &startFrame, &endFrame); ps_seg_frames(segmentationIter, &startFrame, &endFrame);
result[centiseconds(startFrame)] = stringToPhone(phone); result[centiseconds(startFrame)] = stringToPhone(phone);
result[centiseconds(endFrame + 1)] = Phone::None;
} }
// Add dummy entry past the last phone
result[centiseconds(endFrame + 1)] = Phone::None;
return result; return result;
}; };