ESP8266とTwilio音声認識でヘボコン・ロボット用IoTモジュール作成

先に作ったヘボコン・ロボット用IoTモジュールを改良しました。

今回はTwilio SIGNAL 2017で発表された新機能、Speech Recognition(音声認識)とFunctionsを使ってみました。

仕様

ロボットが攻撃を受けたら自動的に操縦者に電話をかけ、音声合成で報告し、操縦者の音声による指示を認識して、ロボットのサーボモータを動かします。

デモ動画

システム

f:id:tsun226:20170625215130p:plain

ESP-WROOM-02 … WiFi機能を持ったAdruino互換CPU(スイッチサイエンス製ESPr Developerを使用)

R … ルータ(iPhoneのテザリングを利用)

Twilio … (REST APIによりFunctionsを起動)

Web Server … Dockerコンテナ上にnginx,php-cgi,twilio-php-master環境を用意(Twilioから音声認識された文字情報を受け取り保存し、またESP-WOOM-02にこの文字情報から生成したサーボモータ制御情報を渡す)

ロボットモジュール配線図

f:id:tsun226:20170625215148p:plain

動作

ロボットのスイッチに敵ロボットが当たる。
ESP-WROOM-02がWiFi→ルータ→インターネット経由でTwilio REST APIに接続し、架電のリクエストをする。
TwilioはFunctionsに登録されている指示に従い、操縦者に架電し音声合成によりメッセージを伝え、操縦者の音声を認識してWeb Serverに文字情報として伝える。
Web ServerはTwilioから受け取った文字情報を保存する。
ESP-WROOM-02は定期的にWeb Serverをポーリングし、4.で保存された文字情報から生成されたサーボモータ制御用情報(角度を表す数値)を取得し、サーボモータの角度をコントロールする。

ESP-WROOM-02スケッチ

定期的にWeb Serverをポーリングし、取得した値をサーボモータに出力します。

スイッチがONになるとTwilio REST APIに架電リクエストをPOSTします。

#ifdef ESP8266
extern "C" {
#include "user_interface.h"
}
#endif

#include <ESP8266WiFi.h>
#include <WiFiClientSecure.h>
#include <Servo.h>

ADC_MODE(ADC_VCC);

WiFiClientSecure client;
Servo servo;

// Twilio Call parameters
const char* asid = “{Twilio ASID}”;
const char* authorization = "Basic {TwilioのBASIC認証データ(BASE64)}”;
const char* twimlUrl = “{TwilioのFunctionsのURL}”;
const char* to = “{宛先電話番号}“;
const char* from = “{発信者電話番号(Twilio側電話番号)}}”;

// HeboCall parameters
const char *hebocallAuthorization = "Basic {Web ServerのBASIC認証データ(BASE64)}”;
const char *hebocallUrl = “{Web Serverサーボモータ制御用情報取得URL}“;

// Wi-Fi parameters
const char* ssid = “{ルータのSSID}”;
const char* pass = “{ルータのパスワード}“;

// Twilio REST API endpoint
const char* twilioHost = "api.twilio.com";
const char* twilioPath0 = "/2010-04-01/Accounts/";
const char* twilioPath1 = "/Calls.json";

// HeboCall endpoint
const char* hebocallHost = “{Web ServerホストのFQDN}“;
const int hebocallPort = {Web Serverのポート番号};
const char* hebocallPath = “{Web Serverのサーボモータ制御用情報取得ファイルパス}”;

#define IO_SWITCH 14
#define IO_LED0 13
#define IO_LED1 12
#define IO_SERVO 16
String content;
String hebocallContent;

void setup() {
Serial.begin(115200);
Serial.println("Setup.");
pinMode(IO_SWITCH, INPUT);
pinMode(IO_LED0, OUTPUT);
pinMode(IO_LED1, OUTPUT);
servo.attach(IO_SERVO);
digitalWrite(IO_LED0, LOW);
digitalWrite(IO_LED1, LOW);
digitalWrite(IO_SERVO, 0);
wifiConnection();
makeContent();
makeHebocallContent();
}

void loop() {
if(digitalRead(IO_SWITCH) == HIGH) {
if(WiFi.status() == WL_CONNECTED || wifiConnection()) {
httpsPost();
for(int i = 0; i < 500; i++) {
led(10);
delay(10);
}
}
}
led(50);
checkAction(500);
delay(10);
}

boolean wifiConnection() {
WiFi.begin(ssid, pass);
int count = 0;
Serial.print("Connecting Wi-Fi");
while (count < 50) {
if (WiFi.status() == WL_CONNECTED) {
Serial.println();
Serial.println("Connected. IP ADDRESS: " + WiFi.localIP().toString());
return(true);
}
delay(500);
Serial.print(".");
count++;
}
Serial.println("Timed out.");
return(false);
}

// POST request to Twilio
void httpsPost() {
Serial.println("Connecting to Twilio API endpoint...");
if (client.connect(twilioHost, 443)) {
client.println(content);
delay(10);
Serial.print("Waiting Twilio API response...");
String response = client.readString();
int bodypos = response.indexOf("\r\n\r\n") + 4;
Serial.println();
Serial.println("RESPONSE: " + response.substring(bodypos));
return;
} else {
Serial.println("ERROR");
return;
}
}

void makeContent() {
String data = "Url=" + (String)twimlUrl + "&To=" + (String)to + "&From=" + (String)from;
String header = "POST " + (String)twilioPath0 + asid + (String)twilioPath1 + " HTTP/1.1\r\n" +
"Host: " + (String)twilioHost + "\r\n" +
"Authorization: " + (String)authorization + "\r\n" +
"User-Agent: ESP8266/1.0\r\n" +
"Connection: close\r\n" +
"Content-Type: application/x-www-form-urlencoded;\r\n" +
"Content-Length: " + data.length();
content = header + "\r\n\r\n" + data;
Serial.println(content);
}

void checkAction(int wait) {
static int count = 0;
String result;
int angle;

count++;
if(count < wait) {
return;
}
count = 0;
result = httpsGet();
angle = result.toInt();
servo.write(angle);
Serial.println("Servo angle = " + String(angle));
}

// GET Action from Web Server
String httpsGet() {
String line;
int i;
int p, pos[2];

Serial.println("Connecting to Web Server ...");
if (client.connect(hebocallHost, hebocallPort)) {
client.println(hebocallContent);
delay(10);
Serial.print("Waiting HeboCall response...");
line = client.readString();
p = line.indexOf("\r\n\r\n") + 4;
for(i = 0; i < 2 && p < line.length(); i++) {
p = line.indexOf("\r\n", p) + 2;
pos[i] = p;
}
Serial.println();
Serial.println("HEBOCALLRESPONSE: " + line.substring(pos[0], pos[1]));
return(line.substring(pos[0], pos[1]));
} else {
Serial.println("ERROR");
return("89");
}
}

void makeHebocallContent() {
hebocallContent = "GET " + (String)hebocallPath + " HTTP/1.1\r\n" +
"Host: " + (String)hebocallHost + "\r\n" +
"Authorization: " + (String)hebocallAuthorization + "\r\n" +
"User-Agent: ESP8266/1.0\r\n" +
"Connection: Keep-Alive\r\n\r\n";
Serial.println(hebocallContent);
}

void led(int wait) {
static int flag = 0;
static int count = 0;

count++;
if(count < wait) {
return;
}
count = 0;
if(flag == 0) {
digitalWrite(IO_LED0, HIGH);
digitalWrite(IO_LED1, LOW);
flag = 1;
} else {
digitalWrite(IO_LED0, LOW);
digitalWrite(IO_LED1, HIGH);
flag = 0;
}
}

Twilio Functionsコード

メッセージを音声合成し送信し、受信した音声を認識してその結果(文字列)を{Web Server文字情報保存のURL}にPOSTします。

(この機能はWeb Serverに置いても、TwiML Binsを使っても可能ですが、新機能Functionsを使ってみたかったのです:-)

exports.handler = function(context, event, callback) {
　　　　const message = "攻撃されています。攻撃されています。指示をしてください。"
　　　　const messageNoRes = "指示を受け取れませんでした。"
　　　　let twiml = new Twilio.twiml.VoiceResponse();
　　　　let gatherParams = {};
　　　　gatherParams.input = "speech";
　　　　gatherParams.language = "ja-JP";
　　　　gatherParams.timeout = "3";
　　　　gatherParams.action = ‘{Web Server文字情報保存のURL}’;
　　　　gatherParams.method = "POST";
　　　　let sayParams = {};
　　　　sayParams.language = "ja-JP";
　　　　sayParams.voice = "alice";
　　　　twiml.gather(gatherParams).say(sayParams, message);
　　　　twiml.say(sayParams, messageNoRes)
　　　　callback(null, twiml);
};

Web Server文字情報受け取りPHPコード

パラメータSpeechResultの値として渡されたTwilioにより認識された文字列を、ファイル /mnt/data/speech_text に保存します。

<?php
require_once dirname(__FILE__) . "/twilio-php-master/Services/Twilio.php";

$message_post = "。了解しました";
$speech_result = $_POST["SpeechResult"];
syslog(LOG_ERR, "speechresult = '" . $speech_result . "'");

# save file
$file_path = "/mnt/data/speech_text";
$result = file_put_contents($file_path , $speech_result . "\n");
if($result == FALSE) {
syslog(LOG_ERR, "ERROR: Couldn't save speech result (" . $speech_result . ") to file(" . $file_path . ")");
}

# make TwiML
$twiml = new Services_Twilio_Twiml();
$twiml->say($speech_result . $message_post,
array(
'language' => "ja-JP",
'voice' => "alice"
));
print $twiml;

?>

Web Serverサーボモータ制御用情報取得PHPコード

ファイル /mnt/data/speech_text 保存されている文字列に含まれる文字により、0, 89 または 179 の角度情報を返します。

<?php
$file_path = "/mnt/data/speech_text";
$speech_result = file_get_contents($file_path);
if(preg_match('/(怒|怖|恐|攻|撃|襲|発|射|始|ang|aggr|att)/i', $speech_result) == 1) {
print "179\n";
exit;
}
if(preg_match('/(笑|楽|元|停|止|終|smile|laugh|easy)/i', $speech_result) == 1) {
print "0\n";
exit;
}

print "89\n";
?>

果たして、このモジュールをどこかのヘボコンで披露する機会は来るのでしょうか。