7 | {registerStatus !== null && 0 <= registerStatus && (
8 |
9 |
10 | Creating commitment...
11 | {registerStatus >= 2 && (
12 |
13 | Success
14 |
15 | )}
16 | {registerStatus === 1 && (
17 |
18 | Error
19 |
20 | )}
21 |
22 |
23 | )}
24 | {3 <= registerStatus && (
25 |
26 |
27 | Sending commitment...
28 | {registerStatus === 5 && (
29 |
30 | Success
31 |
32 | )}
33 | {registerStatus === 4 && (
34 |
35 | Error
36 |
37 | )}
38 |
39 |
40 | )}
41 |
42 | );
43 | }
44 |
--------------------------------------------------------------------------------
/backend/machine_learning/speaker_recognition.py:
--------------------------------------------------------------------------------
1 | # 音声入力に対して特徴量を計算する関数を定義する
2 | from .RawNet3.models import RawNet3
3 |
4 | from .RawNet3.infererence import extract_speaker_embd
5 | import torch
6 | import numpy as np
7 | import soundfile
8 |
9 | def calc_feat_vec(audio, sample_rate):
10 | """
11 | 音声入力に対して特徴量を計算する関数
12 |
13 | Parameters
14 | ----------
15 | input_wav_path : string
16 | 音声データのファイルパスまたはwavファイルをnumpy.arrayに変換したもの。shapeは(10,48000)。
17 | """
18 | # 1. 変数の用意
19 | # model.ptのパス
20 | path_pt = "machine_learning/RawNet3/models/weights/model.pt"
21 |
22 | n_segments = 10
23 | gpu = False
24 |
25 | # 2. 音声データを読み込む
26 | torch_model = RawNet3.MainModel(
27 | encoder_type="ECA",
28 | nOut=256,
29 | out_bn=False,
30 | sinc_stride=10,
31 | log_sinc=True,
32 | norm_sinc="mean",
33 | grad_mult=1)
34 | torch_model.load_state_dict(torch.load(path_pt, map_location=lambda storage, loc: storage)["model"])
35 | torch_model.eval()
36 |
37 | # 3. 音声データを特徴量に変換する
38 | output = extract_speaker_embd(
39 | torch_model,
40 | audio,
41 | sample_rate,
42 | n_samples=48000,
43 | n_segments=n_segments,
44 | gpu=gpu,
45 | ).mean(0)
46 | feat_vec = output
47 |
48 | binary_vec = np.where(feat_vec > 0, 1, 0)
49 |
50 | # 4. 特徴量を返す
51 | return binary_vec
--------------------------------------------------------------------------------
/app/src/RecordButton.jsx:
--------------------------------------------------------------------------------
1 | import React, { useRef } from "react";
2 | import MicIcon from "@mui/icons-material/Mic";
3 | import IconButton from "@mui/material/IconButton";
4 | import MediaStreamRecorder from "msr";
5 |
6 | const record_duration =5000;
7 |
8 | // eslint-disable-next-line react/prop-types
9 | function RecordButton({ sendRecording, disabled, setDisabled, ...props }) {
10 | const mediaRecorder = useRef(null);
11 |
12 | const startRecording = () => {
13 | navigator.mediaDevices
14 | .getUserMedia({ audio: true })
15 | .then((stream) => {
16 | mediaRecorder.current = new MediaStreamRecorder(stream);
17 | mediaRecorder.current.mimeType = "audio/wav"
18 | mediaRecorder.current.audioChannels = 1;
19 | mediaRecorder.current.sampleRate = 16000;
20 | mediaRecorder.current.start(record_duration); // 5秒ごとにデータを取得する
21 | setDisabled(true);
22 |
23 | mediaRecorder.current.ondataavailable = (blob) => {
24 | // 5秒経過したら録音を停止する
25 | stopRecording();
26 | sendRecording(blob);
27 | };
28 | })
29 | .catch((err) => {
30 | console.log("録音が開始できませんでした: ", err);
31 | });
32 | };
33 |
34 | const stopRecording = () => {
35 | if (mediaRecorder.current) {
36 | mediaRecorder.current.stop();
37 | }
38 | };
39 |
40 | return (
41 |