├── app-engine-front-end
├── img
│ ├── favicon.ico
│ ├── icon_3_pdf_x32.png
│ ├── file-preview-pdf.png
│ ├── icon_1_word_x32.png
│ └── sample-outline-map.jpg
├── fonts
│ ├── fontawesome-webfont.eot
│ ├── fontawesome-webfont.ttf
│ ├── fontawesome-webfont.woff
│ └── fontawesome-webfont.woff2
├── README.md
├── ep-upcomingMeetings.py
├── ep-jumbotronContent.py
├── ep-segmentMarkers.py
├── app.yaml
├── utilities.py
├── ep-relatedFiles.py
├── ep-searchVideo.py
├── ep-meetingDetails.py
├── ep-meetingArchive.py
└── ep-searchArchive.py
├── generate-wordcloud
├── fonts
│ └── LilitaOne-Regular.ttf
├── procfile
├── requirements.txt
├── monitor.py
├── Dockerfile
├── README.md
├── worker.py
└── stopwords-20180109-133115.json
├── publish-pdf-transcript
├── fonts
│ ├── Roboto-Black.ttf
│ ├── Roboto-Bold.ttf
│ ├── Roboto-Italic.ttf
│ ├── Roboto-Light.ttf
│ ├── Roboto-Medium.ttf
│ ├── Roboto-Thin.ttf
│ ├── Roboto-Regular.ttf
│ ├── Roboto-BlackItalic.ttf
│ ├── Roboto-BoldItalic.ttf
│ ├── Roboto-LightItalic.ttf
│ ├── Roboto-ThinItalic.ttf
│ └── Roboto-MediumItalic.ttf
├── procfile
├── requirements.txt
├── monitor.py
├── README.md
├── Dockerfile
└── worker.py
├── create-word-list
├── procfile
├── requirements.txt
├── monitor.py
├── Dockerfile
├── README.md
└── worker.py
├── index-meeting
├── procfile
├── requirements.txt
├── monitor.py
├── README.md
├── Dockerfile
└── worker.py
├── in-video-search
├── requirements.txt
├── app.yaml
├── main_test.py
├── README.md
└── main.py
├── transcode-video-to-audio
├── procfile
├── requirements.txt
├── monitor.py
├── README.md
├── Dockerfile
└── worker.py
├── archive-video-search
├── requirements.txt
├── main_test.py
├── app.yaml
├── README.md
└── main.py
├── app-engine-utility-service
├── requirements.txt
├── main.py
├── dispatch.yaml
├── cron.yaml
├── msgPublish.py
├── toggleIndex.py
├── toggleTranscode.py
├── toggleTranscriptErr.py
├── README.md
├── app.yaml
├── idWordcloud.py
├── idTranscript.py
├── idTranscode.py
├── meetingDetails.py
├── runRecognize.py
├── speechJobs.py
├── utilities.py
└── receiveResults.py
├── CONTRIBUTING.md
├── README.md
└── LICENSE.md
/app-engine-front-end/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/img/favicon.ico
--------------------------------------------------------------------------------
/app-engine-front-end/img/icon_3_pdf_x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/img/icon_3_pdf_x32.png
--------------------------------------------------------------------------------
/app-engine-front-end/img/file-preview-pdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/img/file-preview-pdf.png
--------------------------------------------------------------------------------
/app-engine-front-end/img/icon_1_word_x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/img/icon_1_word_x32.png
--------------------------------------------------------------------------------
/generate-wordcloud/fonts/LilitaOne-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/generate-wordcloud/fonts/LilitaOne-Regular.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Black.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Black.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Bold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Bold.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Italic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Italic.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Light.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Light.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Medium.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Thin.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Thin.ttf
--------------------------------------------------------------------------------
/app-engine-front-end/img/sample-outline-map.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/img/sample-outline-map.jpg
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-Regular.ttf
--------------------------------------------------------------------------------
/app-engine-front-end/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/fonts/fontawesome-webfont.eot
--------------------------------------------------------------------------------
/app-engine-front-end/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/fonts/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/app-engine-front-end/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/fonts/fontawesome-webfont.woff
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-BlackItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-BlackItalic.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-BoldItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-BoldItalic.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-LightItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-LightItalic.ttf
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-ThinItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-ThinItalic.ttf
--------------------------------------------------------------------------------
/app-engine-front-end/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/app-engine-front-end/fonts/fontawesome-webfont.woff2
--------------------------------------------------------------------------------
/publish-pdf-transcript/fonts/Roboto-MediumItalic.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/google/gov-meetings-made-searchable/HEAD/publish-pdf-transcript/fonts/Roboto-MediumItalic.ttf
--------------------------------------------------------------------------------
/create-word-list/procfile:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | worker: python /app/worker.py
21 | monitor: python monitor.py
22 |
--------------------------------------------------------------------------------
/index-meeting/procfile:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | worker: python /app/worker.py
21 | monitor: python monitor.py
22 |
--------------------------------------------------------------------------------
/generate-wordcloud/procfile:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | worker: python /app/worker.py
21 | monitor: python monitor.py
22 |
--------------------------------------------------------------------------------
/in-video-search/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | requests
21 | Flask==1.0.2
22 | elasticsearch
23 | gunicorn==19.9.0
--------------------------------------------------------------------------------
/publish-pdf-transcript/procfile:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | worker: python /app/worker.py
21 | monitor: python monitor.py
22 |
--------------------------------------------------------------------------------
/transcode-video-to-audio/procfile:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 | worker: python /app/worker.py
20 | monitor: python monitor.py
21 |
--------------------------------------------------------------------------------
/archive-video-search/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | requests
21 | Flask==1.0.2
22 | elasticsearch
23 | gunicorn==19.9.0
--------------------------------------------------------------------------------
/app-engine-utility-service/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | google-api-python-client
21 | google-cloud-pubsub
22 | BeautifulSoup
23 | feedparser
24 | gcloud
25 | httplib2
--------------------------------------------------------------------------------
/app-engine-utility-service/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | def main():
23 |
24 | print "Content-type: text/plain; charset=UTF-8\n\n"
25 | print "Nothing To See Here"
26 |
27 |
28 | if __name__ == '__main__':
29 | main()
--------------------------------------------------------------------------------
/transcode-video-to-audio/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | Flask==0.10.1
21 | requests==2.18.4
22 | google-cloud-storage==1.6.0
23 | urllib3==1.22
24 | gunicorn==19.6.0
25 | PyMySQL==0.7.3
26 | six==1.10.0
27 | honcho==0.7.1
28 | oauth2client==4.1.2
--------------------------------------------------------------------------------
/create-word-list/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | Flask==0.10.1
21 | requests==2.18.4
22 | google-cloud-pubsub==0.30.1
23 | google-cloud-storage==1.6.0
24 | urllib3==1.22
25 | gunicorn==19.6.0
26 | six==1.10.0
27 | honcho==0.7.1
28 | oauth2client==4.1.2
29 | httplib2==0.10.3
--------------------------------------------------------------------------------
/app-engine-utility-service/dispatch.yaml:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | dispatch:
21 | - url: "__website_URL__/*"
22 | service: engaged-citizens
23 | - url: "*/__website_URL__/*"
24 | service: engaged-citizens
25 | - url: "www.__website_URL__/*"
26 | service: engaged-citizens
--------------------------------------------------------------------------------
/publish-pdf-transcript/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | Flask==0.10.1
21 | requests==2.18.4
22 | google-cloud-pubsub==0.30.1
23 | google-cloud-storage==1.6.0
24 | urllib3==1.22
25 | gunicorn==19.6.0
26 | six==1.10.0
27 | honcho==0.7.1
28 | oauth2client==4.1.2
29 | pdfkit==0.6.1
30 |
--------------------------------------------------------------------------------
/index-meeting/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | Flask==0.10.1
21 | requests==2.18.4
22 | google-cloud-pubsub==0.30.1
23 | google-cloud-storage==1.6.0
24 | urllib3==1.22
25 | gunicorn==19.6.0
26 | PyMySQL==0.7.3
27 | six==1.10.0
28 | honcho==0.7.1
29 | oauth2client==4.1.2
30 | elasticsearch==6.3.1
--------------------------------------------------------------------------------
/in-video-search/app.yaml:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | runtime: python
21 | env: flex
22 | service: in-video-search
23 | entrypoint: gunicorn -b :$PORT main:app
24 |
25 | runtime_config:
26 | python_version: 3
27 |
28 | manual_scaling:
29 | instances: 1
30 |
31 | resources:
32 | cpu: 1
33 | memory_gb: 0.5
34 | disk_size_gb: 10
35 |
--------------------------------------------------------------------------------
/in-video-search/main_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import main
23 |
24 |
25 | def test_index():
26 | main.app.testing = True
27 | client = main.app.test_client()
28 |
29 | r = client.get("/")
30 | assert r.status_code == 200
31 | assert "Hello World" in r.data.decode("utf-8")
32 |
--------------------------------------------------------------------------------
/archive-video-search/main_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import main
23 |
24 |
25 | def test_index():
26 | main.app.testing = True
27 | client = main.app.test_client()
28 |
29 | r = client.get("/")
30 | assert r.status_code == 200
31 | assert "Hello World" in r.data.decode("utf-8")
32 |
--------------------------------------------------------------------------------
/archive-video-search/app.yaml:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | runtime: python
21 | env: flex
22 | service: archive-video-search
23 | entrypoint: gunicorn -b :$PORT main:app
24 |
25 | runtime_config:
26 | python_version: 3
27 |
28 | manual_scaling:
29 | instances: 1
30 |
31 | resources:
32 | cpu: 1
33 | memory_gb: 0.5
34 | disk_size_gb: 10
35 |
--------------------------------------------------------------------------------
/generate-wordcloud/requirements.txt:
--------------------------------------------------------------------------------
1 | # This is not an officially supported Google product, though support
2 | # will be provided on a best-effort basis.
3 |
4 | # Copyright 2018 Google LLC
5 |
6 | # Licensed under the Apache License, Version 2.0 (the "License"); you
7 | # may not use this file except in compliance with the License.
8 |
9 | # You may obtain a copy of the License at:
10 |
11 | # https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 |
20 | Flask==0.10.1
21 | requests==2.18.4
22 | google-cloud-pubsub==0.30.1
23 | google-cloud-storage==1.6.0
24 | urllib3==1.22
25 | gunicorn==19.6.0
26 | six==1.10.0
27 | honcho==0.7.1
28 | oauth2client==4.1.2
29 | scipy==1.0.0
30 | nltk==3.2.4
31 | Pillow==4.3.0
32 | wordcloud==1.3.1
33 | matplotlib==2.1.0
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to
{}
93 | See logs for full stacktrace.
94 | """.format(e), 500
95 |
96 |
97 | if __name__ == "__main__":
98 | app.run(host = "127.0.0.1", port = 8080, debug = True)
99 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Government Meetings Made Searchable
2 |
3 | This is not an officially supported Google product, though support will be provided on a best-effort basis.
4 |
5 | Copyright 2018 Google LLC
6 |
7 | Licensed under the Apache License, Version 2.0 (the "License");
8 | you may not use this file except in compliance with the License.
9 | You may obtain a copy of the License at
10 |
11 | https://www.apache.org/licenses/LICENSE-2.0
12 |
13 | Unless required by applicable law or agreed to in writing, software
14 | distributed under the License is distributed on an "AS IS" BASIS,
15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | See the License for the specific language governing permissions and
17 | limitations under the License.
18 |
19 |
20 | ### Introduction
21 |
22 | This is project to make the contents of public meetings searchable and discoverable. This
23 | repo is a series of utilities and containers you can use to transcode, transcribe, and
24 | publish content from videos of public meetings and hearings.
25 |
26 |
27 | #### app-engine-front-end
28 | This is a Google App Engine app that provides a front end to the project.
29 |
30 |
31 | #### app-engine-utility-service
32 | This is a Google App Engine app that facilitates interaction with the Google Cloud SQL
33 | database, and provides common utilities for back end services.
34 |
35 |
36 | #### in-video-search
37 | This is a Google App Engine Flex app that provides a wrapper and proxy for requests
38 | to an Elastic Search instance. This service handles searches for materials in a
39 | particular meeting.
40 |
41 |
42 | #### archive-video-search
43 | This is a Google App Engine Flex app that provides a wrapper and proxy for requests
44 | to and responses from an Elastic Search instance. This service handles searches for
45 | materials across all meetings in an index.
46 |
47 |
48 | #### transcode-video-to-audio
49 | This is a container that transcodes a video file to an audio file that is compatible with
50 | the Google Speech API.
51 |
52 |
53 | #### create-word-list
54 | This is a container that creates a list of words from the Google Speech API responses that
55 | will be used for creating a word cloud.
56 |
57 |
58 | #### generate-wordcloud
59 | This is a container that creates a word cloud image in PNG format from a list of words
60 | stored on Google Cloud Storage.
61 |
62 |
63 | #### index-meeting
64 | This is a container that parses Google Speech API responses and writes the contents to an
65 | Elastic Search index in a batch process.
66 |
67 | #### publish-pdf-transcript
68 |
69 | This is a container that parses Google Speech API responses and produces a human readable
70 | PDF transcript.
71 |
--------------------------------------------------------------------------------
/archive-video-search/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import json
23 | import logging
24 | from flask import Flask
25 | from flask import request
26 | from flask import Response
27 | from elasticsearch import Elasticsearch
28 |
29 | app = Flask(__name__)
30 |
31 |
32 | @app.route("/")
33 | def main():
34 | q = request.args.get("q")
35 | orgId = request.args.get("orgId")
36 |
37 | searchClient = Elasticsearch(
38 | ["__Elastic_Search_Instance_URL__"],
39 | http_auth = (
40 | "__Elastic_Search_Instance_Username__",
41 | "__Elastic_Search_Instance_Password__"
42 | )
43 | )
44 | queryBody = {
45 | "query": {
46 | "bool": {
47 | "should": {
48 | "match": {
49 | "transcriptStr": {
50 | "query": q,
51 | "operator": "and"
52 | }
53 | }
54 | }
55 | }
56 | },
57 | "sort": [
58 | { "_score": { "order": "desc" } },
59 | ],
60 | "size": 0,
61 | "aggs": {
62 | "group_by_meeting": {
63 | "terms": {
64 | "field": "globalId",
65 | "size": 80,
66 | "min_doc_count": 1
67 | },
68 | "aggs": {
69 | "meeting_details": {
70 | "top_hits": {
71 | "size": 1,
72 | "_source": {
73 | "includes": ["meetingDate", "meetingDesc", "urlIdentifier"]
74 | }
75 | }
76 | }
77 | }
78 | }
79 | }
80 | }
81 |
82 | try:
83 | searchObj = searchClient.search(
84 | index = orgId,
85 | body = queryBody
86 | )
87 | outputStr = json.dumps(searchObj)
88 | except:
89 | outputStr = json.dumps( { "None": "None" } )
90 |
91 | return Response(outputStr, mimetype="application/json")
92 |
93 |
94 | @app.errorhandler(500)
95 | def server_error(e):
96 | logging.exception("An error occurred during a request.")
97 | return """
98 | An internal error occurred: {}
99 | See logs for full stacktrace.
100 | """.format(e), 500
101 |
102 |
103 | if __name__ == "__main__":
104 | app.run(host="127.0.0.1", port=8080, debug=True)
105 |
--------------------------------------------------------------------------------
/app-engine-front-end/ep-meetingDetails.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import cgi
24 | import logging
25 | import utilities
26 | import ujson as json
27 | from datetime import datetime
28 |
29 |
30 | def lookupMeeting(urlIdentifier):
31 | globalId = str(urlIdentifier)
32 | sqlCmd = """select
33 | meetingDesc,
34 | meetingDate,
35 | youtubeId,
36 | wordCloud,
37 | publishedVideo,
38 | publishedTranscript,
39 | orgIdentifier,
40 | publishedAgenda,
41 | urlIdentifier,
42 | hasSegments from meetingRegistry
43 | where urlIdentifier = %s
44 | and youtubeId is not NULL
45 | limit 1"""
46 | sqlData = (urlIdentifier)
47 | resultList = utilities.dbExecution(sqlCmd, sqlData)
48 |
49 | return resultList[2][0]
50 |
51 |
52 | def formatDate(meetingDate):
53 | datetimeObj = datetime.strptime(meetingDate, "%Y%m%d")
54 | formattedDate = datetimeObj.strftime("%B %d, %Y")
55 | weekDay = datetimeObj.strftime("%A")
56 |
57 | return formattedDate, weekDay
58 |
59 |
60 | def main():
61 | errorFound = False
62 |
63 | passedArgs = cgi.FieldStorage()
64 |
65 | try:
66 | urlIdentifier = passedArgs["urlIdentifier"].value
67 | resultObj = lookupMeeting(urlIdentifier)
68 | except:
69 | meetingId = None
70 | errorFound = True
71 |
72 | try:
73 | hasSegments = resultObj[9]
74 | if hasSegments is None:
75 | hasSegments = 0
76 |
77 | formattedDate, weekDay = formatDate(resultObj[1])
78 | outputObj = {}
79 | outputObj["desc"] = resultObj[0]
80 | outputObj["dow"] = weekDay
81 | outputObj["date"] = formattedDate
82 | outputObj["youtubeId"] = resultObj[2]
83 | outputObj["urlIdentifier"] = urlIdentifier
84 | outputObj["wordCloud"] = resultObj[3]
85 | outputObj["videoUrl"] = resultObj[4]
86 | outputObj["transcriptUrl"] = resultObj[5]
87 | outputObj["agendaUrl"] = resultObj[7]
88 | outputObj["hasSegments"] = hasSegments
89 | orgIdentifier = resultObj[6]
90 | logging.info("loadMeeting")
91 | except:
92 | errorFound = True
93 |
94 | if errorFound is True:
95 | outputObj = {}
96 | outputObj["error"] = "Error"
97 |
98 | print "Content-Type: application/json\n"
99 | print json.dumps(outputObj)
100 |
101 |
102 | if __name__ == "__main__":
103 | main()
--------------------------------------------------------------------------------
/app-engine-front-end/ep-meetingArchive.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import cgi
24 | import utilities
25 | import ujson as json
26 | from datetime import datetime
27 |
28 |
29 | def getMeetings(qryLimit, qryOffset, orgIdentifier):
30 | sqlCmd = """select
31 | meetingDesc,
32 | meetingDate,
33 | urlIdentifier,
34 | globalId from meetingRegistry
35 | where orgIdentifier = %s
36 | and youtubeId is not NULL
37 | order by meetingDate DESC
38 | limit %s
39 | offset %s"""
40 | sqlData = (orgIdentifier, int(qryLimit), int(qryOffset))
41 | resultList = utilities.dbExecution(sqlCmd, sqlData)
42 |
43 | lastDate = None
44 | dateCnt = 0
45 |
46 | meetingDict = {}
47 | for eachEntry in resultList[2]:
48 | meetingDate = eachEntry[1]
49 |
50 | dateIncr = meetingDate
51 | if lastDate == meetingDate:
52 | dateCnt = dateCnt + 1
53 | else:
54 | dateCnt = 0
55 | dateIncr = str(meetingDate) + str(dateCnt).zfill(3)
56 |
57 | formattedDate, weekDay = formatDate(meetingDate)
58 |
59 | meetingObj = {}
60 | meetingObj["desc"] = eachEntry[0]
61 | meetingObj["date"] = formattedDate
62 | meetingObj["dow"] = weekDay
63 | meetingObj["meetingId"] = eachEntry[3]
64 | meetingObj["urlIdentifier"] = eachEntry[2]
65 |
66 | meetingDict[dateIncr] = meetingObj
67 | lastDate = meetingDate
68 |
69 | return meetingDict
70 |
71 |
72 | def formatDate(meetingDate):
73 | datetimeObj = datetime.strptime(meetingDate, "%Y%m%d")
74 | formattedDate = datetimeObj.strftime("%B %d, %Y")
75 | weekDay = datetimeObj.strftime("%A")
76 |
77 | return formattedDate, weekDay
78 |
79 |
80 | def main():
81 | passedArgs = cgi.FieldStorage()
82 |
83 | try:
84 | lastMeeting = int(passedArgs["lastMeeting"].value)
85 | orgIdentifier = passedArgs["orgId"].value
86 | except:
87 | lastMeeting = 0
88 | orgIdentifier = None
89 |
90 | if lastMeeting == 0:
91 | meetingTotal = 3
92 | else:
93 | meetingTotal = 6
94 |
95 | jsonObj = getMeetings(meetingTotal, lastMeeting, orgIdentifier)
96 |
97 | keyList = jsonObj.keys()
98 | keyList.sort(reverse=True)
99 | targetMeeting = lastMeeting + meetingTotal
100 |
101 | outputObj = {}
102 | outputObj["meetingList"] = jsonObj
103 |
104 | print "Content-Type: application/json\n"
105 | print json.dumps(outputObj)
106 |
107 |
108 | if __name__ == "__main__":
109 | main()
110 |
--------------------------------------------------------------------------------
/app-engine-utility-service/meetingDetails.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import ujson
24 | import webapp2
25 | import utilities
26 |
27 |
28 | class main(webapp2.RequestHandler):
29 | def get(self):
30 | self.response.headers["Content-Type"] = "application/json"
31 | self.response.headers.add_header(
32 | "Cache-Control",
33 | "no-cache, no-store, must-revalidate, max-age=0"
34 | )
35 | self.response.headers.add_header(
36 | "Expires",
37 | "0"
38 | )
39 |
40 | try:
41 | globalId = self.request.get("gId")
42 | sqlData = (globalId)
43 | sqlCmd = "select videoName, beenTranscribed, beenTranscoded, videoDownloaded, videoLink, orgIdentifier, prodTranscript, meetingDate, meetingDesc, beenIndexed, youtubeId, meetingId, prodTranscode, urlIdentifier from meetingRegistry where globalId = %s"
44 | resultList = utilities.dbExecution(sqlCmd, sqlData)
45 | videoName = resultList[2][0][0]
46 | beenTranscribed = resultList[2][0][1]
47 | beenTranscoded = resultList[2][0][2]
48 | videoDownloaded = resultList[2][0][3]
49 | videoLink = resultList[2][0][4]
50 | orgIdentifier = resultList[2][0][5]
51 | prodTranscript = resultList[2][0][6]
52 | meetingDate = resultList[2][0][7]
53 | meetingDesc = resultList[2][0][8]
54 | beenIndexed = resultList[2][0][9]
55 | youtubeId = resultList[2][0][10]
56 | meetingId = resultList[2][0][11]
57 | prodTranscode = resultList[2][0][12]
58 | urlIdentifier = resultList[2][0][13]
59 | except:
60 | videoName = None
61 | beenTranscribed = None
62 | beenTranscoded = None
63 | videoDownloaded = None
64 | videoLink = None
65 | orgIdentifier = None
66 | prodTranscript = None
67 | meetingDate = None
68 | meetingDesc = None
69 | beenIndexed = None
70 | youtubeId = None
71 | meetingId = None
72 | prodTranscode = None
73 | urlIdentifier= None
74 |
75 | resultObj = {}
76 | resultObj["videoName"] = videoName
77 | resultObj["beenTranscribed"] = beenTranscribed
78 | resultObj["beenTranscoded"] = beenTranscoded
79 | resultObj["videoDownloaded"] = videoDownloaded
80 | resultObj["videoLink"] = videoLink
81 | resultObj["orgIdentifier"] = orgIdentifier
82 | resultObj["prodTranscript"] = prodTranscript
83 | resultObj["meetingDate"] = meetingDate
84 | resultObj["meetingDesc"] = meetingDesc
85 | resultObj["beenIndexed"] = beenIndexed
86 | resultObj["youtubeId"] = youtubeId
87 | resultObj["meetingId"] = meetingId
88 | resultObj["prodTranscode"] = prodTranscode
89 | resultObj["urlIdentifier"] = urlIdentifier
90 |
91 | self.response.out.write(ujson.dumps(resultObj))
92 |
93 |
94 | app = webapp2.WSGIApplication([
95 | ("/meetingDetails", main)], debug = True
96 | )
97 |
--------------------------------------------------------------------------------
/app-engine-utility-service/runRecognize.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import sys
23 | import ujson
24 | import urllib
25 | import traceback
26 | import utilities
27 | from googleapiclient.discovery import build
28 | from google.appengine.ext import vendor
29 | vendor.add("lib")
30 |
31 | import httplib2
32 | from oauth2client.service_account import ServiceAccountCredentials
33 |
34 |
35 | def runCycle(gcsLoc, jobId, globalId):
36 | credentialsJson = "__Credential_JSON_File_Name__"
37 |
38 | scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
39 | credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
40 | credentialsJson,
41 | scopes = scopesList
42 | )
43 |
44 | payloadObj = {
45 | "audio": {
46 | "uri": gcsLoc
47 | },
48 | "config": {
49 | "languageCode": "en-US",
50 | "encoding": "FLAC",
51 | "sampleRateHertz": 16000,
52 | "enableWordTimeOffsets": True,
53 | "enableAutomaticPunctuation": True,
54 | "useEnhanced": True,
55 | "model": "video",
56 | "metadata": {
57 | "interaction_type": "DISCUSSION",
58 | "recording_device_type": "OTHER_INDOOR_DEVICE",
59 | "originalMediaType": "VIDEO"
60 | },
61 | "speechContexts": {
62 | "phrases": [
63 | "Louisville", "Weldona", "signage", "PROSTAC"
64 | ]
65 | }
66 | }
67 | }
68 |
69 | try:
70 | httpObj = credentialsObj.authorize(httplib2.Http())
71 | serviceObj = build(
72 | serviceName = "speech",
73 | version = "v1p1beta1",
74 | http = httpObj,
75 | developerKey = "__Google_Speech_API_Key__"
76 | )
77 | responseObj = serviceObj.speech().longrunningrecognize(body = payloadObj).execute()
78 |
79 | print "job " + str(jobId) + " for global id " + str(globalId)
80 |
81 | apiName = responseObj["name"]
82 | print "request name " + str(apiName)
83 |
84 | sqlCmd = """update speechJobs set apiName = %s, beenProcessed = %s where jobId = %s"""
85 | sqlData = [apiName, 1, jobId]
86 | queryResp = utilities.dbExecution(sqlCmd, sqlData)
87 | except Exception as e:
88 | sqlCmd = """update speechJobs set jobStatus = %s, beenProcessed = %s where jobId = %s"""
89 | sqlData = ["longrunning api call failed", 1, jobId]
90 | queryResp = utilities.dbExecution(sqlCmd, sqlData)
91 | print "longrunning api call failed"
92 |
93 |
94 | def main():
95 | sqlCmd = """select globalId, jobId, gcsLoc from speechJobs where beenProcessed = %s order by jobId limit 1"""
96 | sqlData = [0]
97 | queryResp = utilities.dbExecution(sqlCmd, sqlData)
98 |
99 | print "Content-type: text/plain; charset=UTF-8\n\n"
100 |
101 | if queryResp[2]:
102 | gcsLoc = queryResp[2][0][2].replace("'","")
103 | jobId = queryResp[2][0][1]
104 | globalId = queryResp[2][0][0]
105 | print globalId
106 | print gcsLoc
107 | print jobId
108 | runCycle(gcsLoc, jobId, globalId)
109 | else:
110 | print "No jobs to run."
111 |
112 |
113 | if __name__ == "__main__":
114 | main()
--------------------------------------------------------------------------------
/app-engine-utility-service/speechJobs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import time
23 | import base64
24 | import logging
25 | import calendar
26 | import utilities
27 |
28 | from google.appengine.ext import vendor
29 | vendor.add("lib")
30 | from gcloud import storage
31 |
32 | bucketName = "__GCS_Storage_Bucket_Name__"
33 |
34 |
35 | def lookupMeeting(globalId):
36 | sqlCmd = """select prodTranscode, orgIdentifier from meetingRegistry where globalId = %s"""
37 | sqlData = [globalId]
38 | resultObj = utilities.dbExecution(sqlCmd, sqlData)
39 |
40 | return resultObj[2][0][0], resultObj[2][0][1]
41 |
42 |
43 | def runCycle(globalId, orgIdentifier, prodTranscode, batchId):
44 | clientObj = storage.Client()
45 | bucketObj = clientObj.get_bucket(bucketName)
46 | listObj = bucketObj.list_blobs(prefix="accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcodes/" + prodTranscode)
47 | fileCnt = 0
48 | for eachEntry in listObj:
49 |
50 | if ".flac" in eachEntry.name:
51 | gcsLoc = "gs://" + eachEntry.bucket.name + "/" + eachEntry.name
52 | sqlCmd = """insert into speechJobs (globalId, orgIdentifier, gcsLoc, beenProcessed, batchId) values (%s, %s, %s, %s, %s)"""
53 | sqlData = [globalId, orgIdentifier, gcsLoc, 0, batchId]
54 | utilities.dbExecution(sqlCmd, sqlData)
55 | fileCnt += 1
56 |
57 | return fileCnt
58 |
59 |
60 | def markTranscript(globalId, prodTranscript, batchId):
61 | prodTranscript = str(batchId) + "-" + prodTranscript
62 | sqlCmd = """update meetingRegistry set beenTranscribed = %s, prodTranscript = %s where globalId = %s"""
63 | sqlData = [1, prodTranscript, globalId]
64 | resultObj = utilities.dbExecution(sqlCmd, sqlData)
65 |
66 | return resultObj
67 |
68 |
69 | def main():
70 | projectName = "__GCP_Project_ID__"
71 | subName = "speech-job-subscription"
72 |
73 | print "Content-type: text/plain; charset=UTF-8\n\n"
74 |
75 | respObj = utilities.pullMsg(projectName, subName, True)
76 | if respObj:
77 | receivedMessage = respObj.get("receivedMessages")[0]
78 | msgObj = receivedMessage.get("message")
79 | print ".. pubsub message id: " + str(msgObj.get("messageId"))
80 | msgType = base64.b64decode(str(msgObj.get("data")))
81 | print ".. message type: " + msgType
82 | globalId = msgObj.get("attributes")["globalId"]
83 |
84 | ackId = receivedMessage.get("ackId")
85 | utilities.ackMsg(projectName, subName, ackId)
86 |
87 | if globalId:
88 | epochTime = calendar.timegm(time.gmtime())
89 | batchId = str(epochTime)
90 | print "... creating Speech API jobs for meeting: " + str(globalId)
91 |
92 | prodTranscript, orgIdentifier = lookupMeeting(globalId)
93 | logging.info(prodTranscript)
94 | print ".... production trascript is: " + prodTranscript
95 |
96 | jobCnt = runCycle(globalId, orgIdentifier, prodTranscript, batchId)
97 | print ".... created " + str(jobCnt) + " Speech API jobs"
98 |
99 | markTranscript(globalId, prodTranscript, batchId)
100 | print ".... updated meetingRegistry table"
101 | else:
102 | print "No Messages to Handle"
103 |
104 |
105 | if __name__ == '__main__':
106 | main()
--------------------------------------------------------------------------------
/app-engine-utility-service/utilities.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import urllib
24 | import base64
25 | import MySQLdb
26 | from google.appengine.api import urlfetch
27 |
28 | from google.appengine.ext import vendor
29 | vendor.add("lib")
30 | from googleapiclient.discovery import build
31 | from oauth2client.client import GoogleCredentials
32 |
33 | MySQLdb.escape_string("'")
34 |
35 |
36 | def issueReq(reqUrl):
37 | responseStr = urlfetch.fetch(reqUrl)
38 |
39 | return responseStr.content
40 |
41 |
42 | def cleanInput(inputVal, inputLen):
43 | inputVal = str(inputVal)
44 | if len(inputVal) > inputLen:
45 | inputVal = inputVal[:inputLen]
46 | inputVal = inputVal.replace("/", "")
47 | inputVal = inputVal.replace("\\", "")
48 | inputVal = inputVal.replace(";", "")
49 |
50 | return inputVal
51 |
52 |
53 | def pubsubObj():
54 | credentials = GoogleCredentials.get_application_default()
55 | serviceObj = build("pubsub", "v1", credentials = credentials)
56 |
57 | return serviceObj
58 |
59 |
60 | def publishMsg(projectName, gId, topicName, msgAction):
61 | serviceObj = pubsubObj()
62 | credentialsObj = GoogleCredentials.get_application_default()
63 |
64 | serviceObj = build("pubsub", "v1", credentials = credentialsObj)
65 | topicStr = "projects/%s/topics/%s" % (projectName, topicName)
66 |
67 | msgStr = base64.b64encode(msgAction)
68 | bodyObj = {"messages":
69 | [{
70 | "attributes": {
71 | "globalId": str(gId)
72 | },
73 | "data": msgStr
74 | }]
75 | }
76 | respObj = serviceObj.projects().topics().publish(
77 | topic = topicStr,
78 | body = bodyObj
79 | ).execute()
80 |
81 | return respObj
82 |
83 |
84 | def pullMsg(projectName, subName, returnImmediately):
85 | serviceObj = pubsubObj()
86 | subStr = "projects/%s/subscriptions/%s" % (projectName, subName)
87 |
88 | bodyObj = {
89 | "returnImmediately": returnImmediately,
90 | "maxMessages": 1
91 | }
92 |
93 | resp = serviceObj.projects().subscriptions().pull(
94 | subscription = subStr,
95 | body = bodyObj
96 | ).execute()
97 |
98 | return resp
99 |
100 |
101 | def ackMsg(projectName, subName, ackId):
102 | serviceObj = pubsubObj()
103 | subStr = "projects/%s/subscriptions/%s" % (projectName, subName)
104 | ackBody = {"ackIds": [ackId]}
105 | serviceObj.projects().subscriptions().acknowledge(
106 | subscription = subStr,
107 | body = ackBody
108 | ).execute()
109 |
110 |
111 | def dbExecution(sqlCmd, sqlData):
112 |
113 | sqlInstance = "__Cloud_SQL_Instance_Connection_Name__"
114 |
115 | if os.getenv("SERVER_SOFTWARE", "").startswith("Google App Engine/"):
116 | connection = MySQLdb.connect( unix_socket = "/cloudsql/" + sqlInstance,
117 | user = "__Cloud_SQL_Username__",
118 | passwd = "__Cloud_SQL_User_Password__",
119 | db = "prodDb")
120 | else:
121 | connection = MySQLdb.connect( host = "__Cloud_SQL_Public_IP_Address__",
122 | user = "__Cloud_SQL_Username__",
123 | passwd = "__Cloud_SQL_User_Password__",
124 | db = "prodDb")
125 | cursor = connection.cursor()
126 | if len(sqlData) > 0:
127 | try:
128 | cmdExecution = cursor.execute(sqlCmd, *[sqlData])
129 | except:
130 | cmdExecution = cursor.execute(sqlCmd, [sqlData])
131 | else:
132 | cmdExecution = cursor.execute(sqlCmd)
133 | connection.commit()
134 | numResults = cursor.rowcount
135 | resultRows = cursor.fetchall()
136 | cursor.close()
137 | connection.close()
138 | resultList = [cmdExecution, numResults, resultRows]
139 |
140 | return resultList
141 |
142 |
143 | if __name__ == "__main__":
144 | main()
--------------------------------------------------------------------------------
/app-engine-utility-service/receiveResults.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import ujson
23 | import utilities
24 | from googleapiclient.discovery import build
25 |
26 | from google.appengine.ext import vendor
27 | vendor.add("lib")
28 | import httplib2
29 | from gcloud import storage
30 | from oauth2client.service_account import ServiceAccountCredentials
31 |
32 | bucketName = "__GCS_Storage_Bucket_Name__"
33 |
34 |
35 | def runCycle(jobId, apiName, gcsLoc, globalId, batchId):
36 | credentialsJson = "__Credential_JSON_File_Name__"
37 |
38 | scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
39 | credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
40 | credentialsJson,
41 | scopes = scopesList
42 | )
43 |
44 | payloadObj = {
45 | "key": "__Google_Speech_API_Key__"
46 | }
47 |
48 | httpObj = credentialsObj.authorize(httplib2.Http())
49 | serviceObj = build(
50 | serviceName = "speech",
51 | version = "v1p1beta1",
52 | http = httpObj,
53 | developerKey= "__Google_Speech_API_Key__"
54 | )
55 |
56 | reqObj = serviceObj.operations().get(name=apiName).execute()
57 |
58 | try:
59 | if reqObj["metadata"]["progressPercent"] == 100:
60 | clientObj = storage.Client()
61 | bucketObj = clientObj.get_bucket(bucketName)
62 | cloudPath = gcsLoc.replace("'", "")
63 | cloudPath = cloudPath.replace(".flac", ".json")
64 | bucketPrexif = "gs://" + bucketName + "/"
65 | cloudPath = cloudPath.replace(bucketPrexif, "")
66 | cloudPath = cloudPath.replace("transcodes/", "")
67 |
68 | globalDir = "/" + str(globalId) + "/"
69 | transDir = globalDir + "transcripts/" + str(batchId) + "-"
70 | newPath = cloudPath.replace(globalDir, transDir)
71 |
72 | blobObj = bucketObj.blob(newPath)
73 | blobObj.upload_from_string(ujson.dumps(reqObj))
74 |
75 | sqlCmd = """update speechJobs set respExported = %s where jobId = %s"""
76 | sqlData = [1, jobId]
77 | queryResp = utilities.dbExecution(sqlCmd, sqlData)
78 |
79 | return "... job " + str(jobId) + " finished"
80 | else:
81 | return "... job " + str(reqObj["metadata"]["progressPercent"]) + "% complete"
82 | except Exception, e:
83 | return "... job queued"
84 |
85 |
86 | def nextAction(globalId):
87 | projectName = "__GCP_Project_ID__"
88 | topicName = "wordlistQueue"
89 | msgAction = "create-word-list"
90 |
91 | return utilities.publishMsg(projectName, globalId, topicName, msgAction)
92 |
93 |
94 | def main():
95 | print "Content-type: text/plain; charset=UTF-8\n\n"
96 |
97 | sqlCmd = """select jobId, apiName, gcsLoc, globalId, batchId from speechJobs
98 | where beenProcessed = %s
99 | and respExported = %s
100 | and jobStatus is %s
101 | order by queueTimestamp asc limit 2"""
102 | sqlData = [1, 0, None]
103 | queryResp = utilities.dbExecution(sqlCmd, sqlData)
104 |
105 | for eachEntry in queryResp[2]:
106 | jobId = eachEntry[0]
107 | ##print jobId
108 | apiName = eachEntry[1]
109 | gcsLoc = eachEntry[2]
110 | globalId = eachEntry[3]
111 | batchId = eachEntry[4]
112 |
113 | if jobId:
114 | print "global Id: " + str(globalId)
115 | print "... job " + str(jobId)
116 | print runCycle(jobId, apiName, gcsLoc, globalId, batchId)
117 |
118 | sqlCmd = """select count(*) from speechJobs
119 | where respExported = %s
120 | and jobStatus is %s
121 | and globalId = %s"""
122 | sqlData = [0, None, globalId]
123 | queryResp = utilities.dbExecution(sqlCmd, sqlData)
124 |
125 | print "... " + str(queryResp[2][0][0]) + " jobs still in the queue"
126 | if queryResp[2][0][0] == 0:
127 | print nextAction(globalId)
128 | print ""
129 |
130 |
131 | if __name__ == "__main__":
132 | main()
133 |
--------------------------------------------------------------------------------
/app-engine-front-end/ep-searchArchive.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import cgi
24 | import ujson
25 | import urllib
26 | import logging
27 | from datetime import datetime
28 | from collections import Counter
29 | from google.appengine.ext import vendor
30 | from google.appengine.api import urlfetch
31 |
32 | archive_video_search_service_url = "__Archive_Video_Search_Service_URL__"
33 |
34 |
35 | def mkDate(inputStr):
36 | datetimeObj = datetime.strptime(inputStr, "%Y%m%d")
37 | date_formatted_for_dateList = datetimeObj.strftime("%Y-%m-%d")
38 | dateDesc = datetimeObj.strftime("%b %-d, %Y")
39 |
40 | return [dateDesc, date_formatted_for_dateList]
41 |
42 |
43 | def main():
44 | passedArgs = cgi.FieldStorage()
45 | queryString = passedArgs["q"].value
46 | orgIdentifier = passedArgs["orgId"].value
47 |
48 | logging.info("archive_search")
49 |
50 | payloadObj = {
51 | "orgId": orgIdentifier,
52 | "q": queryString
53 | }
54 |
55 | urlParams = urllib.urlencode(payloadObj, doseq=True)
56 | reqUrl = archive_video_search_service_url + "/?%s" % urlParams
57 | responseObj = urlfetch.fetch(reqUrl)
58 | responseStr = responseObj.content
59 |
60 | searchObj = ujson.loads(responseStr)
61 |
62 | dateList = []
63 | cntList = []
64 | descList = []
65 | tooltipList = []
66 | pieData = []
67 |
68 | meeting_total = 0
69 | result_total = 0
70 |
71 | resultCnt = 0
72 | resultObj = []
73 | for eachResp in searchObj["aggregations"]["group_by_meeting"]["buckets"]:
74 | meeting_total += 1
75 | dateIndex = eachResp["meeting_details"]["hits"]["hits"][0]["_source"]["meetingDate"]
76 | datetimeObj = datetime.strptime(dateIndex, "%Y%m%d")
77 | meetingDate = datetimeObj.strftime("%B %-d, %Y")
78 |
79 | tmpObj = {}
80 | tmpObj["urlIdentifier"] = eachResp["meeting_details"]["hits"]["hits"][0]["_source"]["urlIdentifier"]
81 | tmpObj["transcriptMatches"] = eachResp["doc_count"]
82 | tmpObj["meetingDate"] = meetingDate
83 | tmpObj["dateIndex"] = dateIndex
84 | tmpObj["meetingDesc"] = eachResp["meeting_details"]["hits"]["hits"][0]["_source"]["meetingDesc"]
85 | resultObj.append(tmpObj)
86 | resultCnt += 1
87 |
88 | meetingDate = eachResp["meeting_details"]["hits"]["hits"][0]["_source"]["meetingDate"]
89 | formatted_date_list = mkDate(meetingDate);
90 | dateDesc = formatted_date_list[0];
91 | date_formatted_for_dateList = formatted_date_list[1];
92 |
93 | returnCnt = eachResp["doc_count"]
94 | cntList.append(returnCnt)
95 |
96 | result_total += returnCnt
97 |
98 | dateList.append(date_formatted_for_dateList)
99 |
100 | meetingDesc = eachResp["meeting_details"]["hits"]["hits"][0]["_source"]["meetingDesc"]
101 | descList.append(meetingDesc)
102 |
103 | tooltipStr = "%s - %s - %s results" % (meetingDesc, dateDesc, returnCnt)
104 | tooltipList.append(tooltipStr)
105 |
106 | entryObj = {}
107 | entryObj["meetingDesc"] = meetingDesc
108 | entryObj["returnCnt"] = returnCnt
109 | pieData.append(entryObj)
110 |
111 | infoObj = {}
112 | infoObj["meeting_total"] = resultCnt
113 | infoObj["result_total"] = result_total
114 |
115 | respObj = {}
116 | respObj["searchResults"] = resultObj
117 |
118 | chartObj = {}
119 | chartObj["countList"] = cntList
120 | chartObj["dateList"] = dateList
121 | chartObj["tooltipList"] = tooltipList
122 |
123 |
124 | masterList = []
125 | for eachEntry in pieData:
126 | for i in range(0, eachEntry["returnCnt"]):
127 | masterList.append(eachEntry["meetingDesc"])
128 | tallyDict = dict(Counter(masterList))
129 | pie_data_list = []
130 | for eachEntry in tallyDict.keys():
131 | pie_data_list.append([eachEntry, tallyDict[eachEntry]])
132 | chartObj["pieData"] = pie_data_list
133 |
134 | respObj["chartData"] = chartObj
135 | respObj["resultsInfo"] = infoObj
136 |
137 | print "Expires: 0"
138 | print "Cache-Control: no-cache, no-store, must-revalidate, max-age=0"
139 | print "Content-Type: application/json\n"
140 | print ujson.dumps(respObj)
141 |
142 |
143 | if __name__ == "__main__":
144 | main()
145 |
--------------------------------------------------------------------------------
/index-meeting/worker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import cgi
24 | import json
25 | import time
26 | import urllib
27 | import base64
28 | import requests
29 | from google.cloud import pubsub
30 | from google.cloud import storage
31 | from elasticsearch import helpers
32 | from elasticsearch import Elasticsearch
33 | from oauth2client.service_account import ServiceAccountCredentials
34 |
35 | service_account_json = "__Credential_JSON_File_Name__"
36 | dirPath = os.path.normpath(os.getcwd())
37 | service_account_path = os.path.join(dirPath, service_account_json)
38 |
39 | projectId = "__GCP_Project_ID__"
40 | topicName = "indexQueue"
41 | subName = "index-meeting-subscription"
42 |
43 | bucketName = "__GCS_Storage_Bucket_Name__"
44 | utility_service_url = "__Utility_Service_URL__"
45 |
46 | psClient = pubsub.SubscriberClient()
47 |
48 | topicPath = psClient.topic_path(
49 | projectId,
50 | topicName
51 | )
52 |
53 | subPath = psClient.subscription_path(
54 | projectId,
55 | subName
56 | )
57 |
58 | subObj = psClient.subscribe(
59 | subPath
60 | )
61 |
62 | cummTime = None
63 |
64 | searchClient = Elasticsearch(
65 | ["https://7b76c3d47a7445119d54a4089ae9e9a3.us-central1.gcp.cloud.es.io:9243/"],
66 | http_auth = (
67 | "elastic",
68 | "EbJ3suYnJ5YdTIJRM9i38Swu"
69 | )
70 | )
71 |
72 |
73 | def psCall(reqUrl, postPayload):
74 | scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
75 | credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
76 | service_account_path,
77 | scopes = scopesList
78 | )
79 |
80 | accessToken = "Bearer %s" % credentialsObj.get_access_token().access_token
81 | headerObj = {
82 | "authorization": accessToken,
83 | }
84 |
85 | reqObj = requests.post(
86 | reqUrl,
87 | data = json.dumps(postPayload),
88 | headers = headerObj
89 | )
90 |
91 | return reqObj.text
92 |
93 |
94 | def processFile(jsonObj, globalId, orgIdentifier, meetingDate, meetingDesc, urlIdentifier):
95 | maxTime = 0
96 | global cummTime
97 | indexList = []
98 | if "response" in jsonObj:
99 | if "results" in jsonObj["response"]:
100 | for eachAlt in jsonObj["response"]["results"]:
101 | if "alternatives" in eachAlt:
102 | transcriptStr = eachAlt["alternatives"][0]["transcript"]
103 | videoTimestamp = eachAlt["alternatives"][0]["words"][0]["startTime"]
104 |
105 | videoTimestamp = str(videoTimestamp).replace("s", "")
106 | videoTimestamp = float(videoTimestamp)
107 |
108 |
109 | wordsLen = len(eachAlt["alternatives"][0]["words"]) - 1
110 | endTime = eachAlt["alternatives"][0]["words"][wordsLen]["endTime"]
111 | endTime = str(endTime).replace("s", "")
112 | endTime = float(endTime)
113 |
114 | segLen = round(endTime - videoTimestamp)
115 |
116 | displayTime = videoTimestamp + float(cummTime)
117 | maxTime = videoTimestamp
118 |
119 | transcriptStr = transcriptStr.replace("Lewisville", "Louisville")
120 | transcriptStr = transcriptStr.replace("Pro stack", "PROSTAC")
121 | transcriptStr = transcriptStr.replace("pro stack", "PROSTAC")
122 | transcriptStr = transcriptStr.replace("Pro Stacks", "PROSTAC")
123 | transcriptStr = transcriptStr.replace("pro Strat", "PROSTAC")
124 | transcriptStr = transcriptStr.replace("pro-sex", "PROSTAC")
125 |
126 | indexEntry = {
127 | "_index": orgIdentifier,
128 | "_type": "meeting-transcript",
129 | "_source": {
130 | "globalId": int(globalId),
131 | "mediaTimestamp": displayTime,
132 | "segmentLength": segLen,
133 | "meetingDesc": meetingDesc,
134 | "transcriptStr": transcriptStr,
135 | "meetingDate": meetingDate,
136 | "urlIdentifier": urlIdentifier
137 | }
138 | }
139 | indexList.append(indexEntry)
140 |
141 | return indexList
142 |
143 |
144 | def runIndexing(globalId, prodTrans, orgIdentifier, meetingDate, meetingDesc, urlIdentifier):
145 | cloudPath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcripts/" + str(prodTrans) + "/"
146 |
147 | clientObj = storage.Client.from_service_account_json(service_account_path)
148 | bucketObj = clientObj.get_bucket(bucketName)
149 | listObj = bucketObj.list_blobs(prefix = cloudPath)
150 | transcriptList = []
151 |
152 | for eachEntry in listObj:
153 | if ".json" in eachEntry.name:
154 | transcriptList.append(eachEntry.name)
155 |
156 | global cummTime
157 | cummTime = 0
158 | maxTime = 0
159 | fileCnt = 0
160 |
161 | fileList = []
162 | for eachFile in sorted(transcriptList, reverse=False):
163 | fileCnt += 1
164 | fileList.append(eachFile)
165 |
166 | blobObj = bucketObj.get_blob(eachFile)
167 | blobStr = blobObj.download_as_string()
168 | jsonObj = json.loads(blobStr)
169 |
170 | indexList = processFile(
171 | jsonObj,
172 | globalId,
173 | orgIdentifier,
174 | meetingDate,
175 | meetingDesc,
176 | urlIdentifier
177 | )
178 |
179 | helpers.bulk(searchClient, indexList)
180 |
181 | cummTime = float(cummTime) + 10800
182 |
183 | return cloudPath, fileList, fileCnt, len(indexList)
184 |
185 |
186 | def dispatchWorker(ackId, globalId):
187 | try:
188 | reqUrl = utility_service_url + "/meetingDetails?gId=%s" % globalId
189 | responseObj = requests.get(reqUrl)
190 | respTxt = responseObj.text
191 |
192 | jsonObj = json.loads(respTxt)
193 |
194 | prodTrans = jsonObj["prodTranscript"]
195 | orgIdentifier = jsonObj["orgIdentifier"]
196 | meetingDate = jsonObj["meetingDate"]
197 | meetingDesc = jsonObj["meetingDesc"]
198 | beenIndexed = jsonObj["beenIndexed"]
199 | urlIdentifier = jsonObj["urlIdentifier"]
200 |
201 | if beenIndexed == 0:
202 | toggleResp = toggleIndex(globalId)
203 | print "... index marker has been updated " + str(toggleResp)
204 | cloudPath, fileList, fileCnt, indexLen = runIndexing(
205 | globalId,
206 | prodTrans,
207 | orgIdentifier,
208 | meetingDate,
209 | meetingDesc,
210 | urlIdentifier
211 | )
212 |
213 | print globalId
214 | print ""
215 | print cloudPath
216 | print ""
217 | print fileList
218 | print ""
219 |
220 | print str(fileCnt) + " files processed"
221 | else:
222 | print "... entry has already been indexed"
223 | postPayload = {
224 | "ackIds": [ackId]
225 | }
226 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
227 | reqUrl = "https://pubsub.googleapis.com/v1/%s:acknowledge" % subStr
228 | psMsg = psCall(reqUrl, postPayload)
229 | print "... Pubsub message acknowledged"
230 | except Exception as e:
231 | print "skip " + e.message
232 |
233 |
234 | def toggleIndex(globalId):
235 | reqUrl = utility_service_url + "/toggleIndex?gId=%s" % globalId
236 | responseObj = requests.get(reqUrl)
237 | respTxt = responseObj.text
238 |
239 | return respTxt
240 |
241 |
242 | def nextAction(globalId):
243 | reqUrl = utility_service_url + "/msgPublish"
244 | payloadObj = {
245 | "msgAction": "publish-transcript",
246 | "topicName": "publish-transcript-queue",
247 | "gId": globalId
248 | }
249 | responseObj = requests.get(
250 | reqUrl,
251 | params = payloadObj
252 | )
253 | respTxt = responseObj.text
254 |
255 | return respTxt
256 |
257 |
258 | def main():
259 | postPayload = {
260 | "returnImmediately": True,
261 | "maxMessages": 1
262 | }
263 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
264 | reqUrl = "https://pubsub.googleapis.com/v1/%s:pull" % subStr
265 |
266 | while True:
267 | psMsg = psCall(reqUrl, postPayload)
268 | try:
269 | jsonObj = json.loads(psMsg)
270 | msgType = base64.b64decode(jsonObj["receivedMessages"][0]["message"]["data"])
271 | print "Message Received. Type = '%s'" % str(msgType)
272 | ackId = jsonObj["receivedMessages"][0]["ackId"]
273 | globalId = jsonObj["receivedMessages"][0]["message"]["attributes"]["globalId"]
274 | print ackId
275 | print globalId
276 | dispatchWorker(ackId, globalId)
277 | print nextAction(globalId)
278 | except:
279 | pass
280 | time.sleep(4)
281 |
282 |
283 | if __name__ == "__main__":
284 | main()
285 |
--------------------------------------------------------------------------------
/create-word-list/worker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import json
24 | import time
25 | import base64
26 | import requests
27 | import httplib2
28 | from google.cloud import pubsub
29 | from google.cloud import storage
30 | from oauth2client.service_account import ServiceAccountCredentials
31 |
32 | service_account_json = "__Credential_JSON_File_Name__"
33 | dirPath = os.path.normpath(os.getcwd())
34 | service_account_path = os.path.join(dirPath, service_account_json)
35 |
36 | projectId = "__GCP_Project_ID__"
37 | topicName = "wordlistQueue"
38 | subName = "word-list-creation-subscription"
39 |
40 | bucketName = "__GCS_Storage_Bucket_Name__"
41 | utility_service_url = "__Utility_Service_URL__"
42 |
43 | psClient = pubsub.SubscriberClient()
44 |
45 | topicPath = psClient.topic_path(
46 | projectId,
47 | topicName
48 | )
49 |
50 | subPath = psClient.subscription_path(
51 | projectId,
52 | subName
53 | )
54 |
55 | subObj = psClient.subscribe(
56 | subPath
57 | )
58 |
59 |
60 | def psCall(reqUrl, postPayload):
61 | scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
62 | credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
63 | service_account_json,
64 | scopes = scopesList
65 | )
66 |
67 | accessToken = "Bearer %s" % credentialsObj.get_access_token().access_token
68 | headerObj = {
69 | "authorization": accessToken,
70 | }
71 |
72 | reqObj = requests.post(
73 | reqUrl,
74 | data = json.dumps(postPayload),
75 | headers = headerObj
76 | )
77 |
78 | return reqObj.text
79 |
80 |
81 | def acknowledgeMsg(ackId):
82 | postPayload = {
83 | "ackIds": [ackId]
84 | }
85 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
86 | reqUrl = "https://pubsub.googleapis.com/v1/%s:acknowledge" % subStr
87 | psMsg = psCall(reqUrl, postPayload)
88 |
89 | return "... Pubsub message acknowledged"
90 |
91 |
92 | def get_api_results(globalId, orgIdentifier, prodTranscript):
93 | basePath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcripts/"
94 | cloudPath = basePath + str(prodTranscript) + "/"
95 | clientObj = storage.Client.from_service_account_json(service_account_path)
96 | bucketObj = clientObj.get_bucket(bucketName)
97 | listObj = bucketObj.list_blobs(prefix=cloudPath)
98 | transcriptList = []
99 | for eachEntry in listObj:
100 | if ".json" in eachEntry.name:
101 | transcriptList.append(str(eachEntry.name))
102 | #print eachEntry.size
103 |
104 | return transcriptList
105 |
106 |
107 | def results_files_to_string(transcriptList, exportType):
108 | clientObj = storage.Client.from_service_account_json(service_account_path)
109 | bucketObj = clientObj.get_bucket(bucketName)
110 |
111 | fileCnt = 0
112 |
113 | masterStr = ""
114 | for eachFile in sorted(transcriptList, reverse=False):
115 | fileCnt += 1
116 |
117 | blobObj = bucketObj.get_blob(eachFile)
118 | blobStr = blobObj.download_as_string()
119 | jsonObj = json.loads(blobStr)
120 |
121 | if "response" in jsonObj:
122 | if "results" in jsonObj["response"]:
123 | for eachAlt in jsonObj["response"]["results"]:
124 | tmpStr = ""
125 | if exportType is "list":
126 | if "alternatives" in eachAlt:
127 | for eachWord in eachAlt["alternatives"][0]["words"]:
128 | tmpStr = eachWord["word"]
129 | tmpStr = tmpStr.lower()
130 | tmpStr = tmpStr.replace(".", "")
131 | tmpStr = tmpStr.replace(",", "")
132 | tmpStr = tmpStr.replace("?", "")
133 | tmpStr = tmpStr.replace("!", "")
134 | tmpStr = tmpStr + "\n"
135 | masterStr = masterStr + tmpStr
136 | if exportType is "long":
137 | tmpStr = eachAlt["alternatives"][0]["transcript"]
138 | masterStr = masterStr + " " + tmpStr
139 | masterStr = masterStr.replace("lewisville", "louisville")
140 | masterStr = masterStr.replace("Pro stack", "PROSTAC")
141 | masterStr = masterStr.replace("pro stack", "PROSTAC")
142 | masterStr = masterStr.replace("Pro Stacks", "PROSTAC")
143 | masterStr = masterStr.replace("pro Strat", "PROSTAC")
144 | masterStr = masterStr.replace("pro-sex", "PROSTAC")
145 |
146 | return masterStr, fileCnt
147 |
148 |
149 | def write_string_to_gcs(globalId, orgIdentifier, prodTranscript, exportType, masterStr):
150 | basePath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcripts/"
151 | fileName = "rawTxt-" + str(globalId) + "-" + str(prodTranscript) + "-" + exportType + ".txt"
152 | newPath = basePath + fileName
153 |
154 | clientObj = storage.Client.from_service_account_json(service_account_path)
155 | bucketObj = clientObj.get_bucket(bucketName)
156 | blobObj = bucketObj.blob(newPath)
157 | blobObj.upload_from_string(masterStr.strip())
158 |
159 | return "... file created in GCS"
160 |
161 |
162 | def lookupMeeting(globalId):
163 | reqUrl = utility_service_url + "/meetingDetails"
164 | payloadObj = {
165 | "gId": globalId
166 | }
167 | responseObj = requests.get(reqUrl, params=payloadObj)
168 | respTxt = responseObj.text
169 | jsonObj = json.loads(respTxt)
170 |
171 | return jsonObj["prodTranscript"], jsonObj["orgIdentifier"]
172 |
173 |
174 | def nextAction(globalId):
175 | reqUrl = utility_service_url + "/msgPublish"
176 | payloadObj = {
177 | "msgAction": "create-wordcloud",
178 | "topicName": "wordcloudQueue",
179 | "gId": globalId
180 | }
181 | responseObj = requests.get(
182 | reqUrl,
183 | params = payloadObj
184 | )
185 | respTxt = responseObj.text
186 | respTxt = respTxt.replace("\r", "")
187 | respTxt = respTxt.replace("\n", "")
188 |
189 | return ".. next action initiated: " + str(respTxt)
190 |
191 |
192 | def issue_transcript_error(globalId):
193 | reqUrl = utility_service_url + "/toggleTranscriptErr"
194 | payloadObj = {
195 | "gId": globalId
196 | }
197 | responseObj = requests.get(reqUrl, params=payloadObj)
198 | respTxt = responseObj.text
199 |
200 | return respTxt
201 |
202 |
203 | def dispatchWorker(ackId, globalId):
204 | successFlag = None
205 | try:
206 | print ".. creating word list for meeting: " + str(globalId)
207 | prodTranscript, orgIdentifier = lookupMeeting(globalId)
208 | print "... word list will be created from transcript: " + str(prodTranscript)
209 | transcriptList = get_api_results(globalId, orgIdentifier, prodTranscript)
210 | print "... " + str(len(transcriptList)) + " files will be processed"
211 | masterStr, fileCnt = results_files_to_string(transcriptList, "list")
212 |
213 | print "... " + str(fileCnt) + " files processed"
214 | print ".... the masterStr is " + str(len(masterStr)) + " characters in length"
215 | if len(masterStr) > 0 and fileCnt > 0:
216 | print write_string_to_gcs(globalId, orgIdentifier, prodTranscript, "list", masterStr)
217 | successFlag = True
218 | else:
219 | print ".... something isn't right so issuing a transcriptErr"
220 | toggleResp = issue_transcript_error(globalId)
221 | print ".... " + str(toggleResp)
222 | successFlag = False
223 |
224 | print acknowledgeMsg(ackId)
225 | except Exception as e:
226 | print "something went wrong"
227 | print acknowledgeMsg(ackId)
228 | print "skip " + e.message
229 | successFlag = False
230 |
231 | return successFlag
232 |
233 |
234 | def main():
235 | postPayload = {
236 | "returnImmediately": True,
237 | "maxMessages": 1
238 | }
239 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
240 | reqUrl = "https://pubsub.googleapis.com/v1/%s:pull" % subStr
241 |
242 | while True:
243 | psMsg = psCall(reqUrl, postPayload)
244 | try:
245 | jsonObj = json.loads(psMsg)
246 | msgType = base64.b64decode(jsonObj["receivedMessages"][0]["message"]["data"])
247 | ackId = jsonObj["receivedMessages"][0]["ackId"]
248 | globalId = jsonObj["receivedMessages"][0]["message"]["attributes"]["globalId"]
249 | successFlag = dispatchWorker(ackId, globalId)
250 | if successFlag == True:
251 | print nextAction(globalId)
252 | else:
253 | print ".. not initiating next action"
254 | print ""
255 | except:
256 | pass
257 | time.sleep(4)
258 |
259 |
260 | if __name__ == "__main__":
261 | main()
262 |
--------------------------------------------------------------------------------
/transcode-video-to-audio/worker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import re
24 | import sys
25 | import json
26 | import time
27 | import base64
28 | import shutil
29 | import calendar
30 | import requests
31 | import subprocess
32 | from google.cloud import storage
33 | from time import gmtime, strftime
34 | from oauth2client.service_account import ServiceAccountCredentials
35 |
36 | dirPath = os.path.normpath(os.getcwd())
37 | credentialsJson = "__Credential_JSON_File_Name__"
38 |
39 | projectId = "__GCP_Project_ID__"
40 | topicName = "transcodeQueue"
41 | subName = "media-transcode-subscription"
42 |
43 | bucketName = "__GCS_Storage_Bucket_Name__"
44 | utility_service_url = "__Utility_Service_URL__"
45 |
46 | segment_length_minutes = 180
47 | segment_length_seconds = segment_length_minutes * 60
48 |
49 |
50 | def psCall(reqUrl, postPayload):
51 | scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
52 | credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
53 | credentialsJson,
54 | scopes = scopesList
55 | )
56 |
57 | accessToken = "Bearer %s" % credentialsObj.get_access_token().access_token
58 | headerObj = {
59 | "authorization": accessToken,
60 | }
61 |
62 | reqObj = requests.post(
63 | reqUrl,
64 | data = json.dumps(postPayload),
65 | headers = headerObj
66 | )
67 |
68 | return reqObj.text
69 |
70 |
71 | def modifyDeadline(ackId, timeWindow):
72 | postPayload = {
73 | "ackIds": [ackId],
74 | "ackDeadlineSeconds": timeWindow
75 | }
76 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
77 | reqUrl = "https://pubsub.googleapis.com/v1/%s:modifyAckDeadline" % subStr
78 | print ".... extending acknowledgement deadline by " + str(timeWindow) + " seconds"
79 |
80 | return psCall(reqUrl, postPayload)
81 |
82 |
83 | def downloadFile(fileName, filePath, orgIdentifier):
84 | clientObj = storage.Client()
85 | bucketObj = clientObj.get_bucket(bucketName)
86 | cloudPath = "accounts/%s/video/" % orgIdentifier
87 | cloudPath = cloudPath + fileName
88 | print ".... defining cloud path for file"
89 | print "..... " + cloudPath
90 | blobObj = bucketObj.blob(cloudPath)
91 | print ".... downloading source file locally"
92 | print "..... " + filePath
93 | with open(filePath, "w") as fileObj:
94 | blobObj.download_to_file(fileObj)
95 |
96 | return
97 |
98 |
99 | def uploadFiles(globalId, segmentsFlac, ackId, orgIdentifier, tsDir):
100 | clientObj = storage.Client()
101 | bucketObj = clientObj.get_bucket("municeps")
102 | flacList = os.listdir(segmentsFlac)
103 | print "... preparing to upload %s files" % str(len(flacList))
104 | for eachFile in os.listdir(segmentsFlac):
105 | print ".... " + eachFile
106 | cloudPath = "accounts/%s/enrichments/%s/transcodes/%s/%s" % (orgIdentifier, str(globalId), tsDir, eachFile)
107 | ##print ".... " + cloudPath
108 | blobObj = bucketObj.blob(cloudPath)
109 | localPath = segmentsFlac + "/" + eachFile
110 | ##print ".... " + localPath
111 | blobObj.upload_from_filename(localPath)
112 | modifyDeadline(ackId, 120)
113 |
114 | return
115 |
116 |
117 | def nameSegments(videoName, segmentsWav):
118 | timeStamp = strftime("%Y%m%d-%H%M%S", gmtime())
119 |
120 | segmentName = videoName.lower()
121 | periodNum = len(segmentName.split("."))
122 | segmentName = segmentName[::-1]
123 | segmentName = segmentName.split(".")[periodNum - 1]
124 | segmentName = segmentName[::-1]
125 | segmentName = re.sub(r'[^\w]', "", segmentName)
126 | segmentName = timeStamp + "-" + segmentName + "-" + "%04d.wav"
127 | segmentPath = os.path.join(segmentsWav, segmentName)
128 |
129 | return segmentPath
130 |
131 |
132 | def transcode(globalId, videoName, ackId, orgIdentifier, tsDir):
133 | workingDir = os.path.join(dirPath, str(globalId))
134 | if not os.path.exists(workingDir):
135 | os.makedirs(workingDir)
136 | filePath = os.path.join(workingDir, videoName)
137 |
138 | print "... preparing to download media"
139 | print ".... " + videoName
140 | downloadFile(videoName, filePath, orgIdentifier)
141 | print ".... media downloaded"
142 |
143 | print "... creating WAV directory locally"
144 | segmentsWav = os.path.join(workingDir, "segmentsWav")
145 | print ".... " + segmentsWav
146 | if os.path.exists(segmentsWav):
147 | shutil.rmtree(segmentsWav)
148 | if not os.path.exists(segmentsWav):
149 | os.makedirs(segmentsWav)
150 |
151 | print "... creating FLAC directory locally"
152 | segmentsFlac = os.path.join(workingDir, "segmentsFlac")
153 | print ".... " + segmentsFlac
154 | if os.path.exists(segmentsFlac):
155 | shutil.rmtree(segmentsFlac)
156 | if not os.path.exists(segmentsFlac):
157 | os.makedirs(segmentsFlac)
158 |
159 | print "... defining segments path"
160 | segmentPath = nameSegments(videoName, segmentsWav)
161 | print ".... " + segmentPath
162 |
163 | modifyDeadline(ackId, 120)
164 |
165 | print "... converting to WAV format"
166 |
167 | print ".... source file is in " + filePath[-4:] + " format"
168 |
169 | segmentCmd = "ffmpeg -loglevel error -i %s -f segment -segment_time %s -reset_timestamps 1 -ac 1 -ar %s %s" % (
170 | filePath,
171 | segment_length_seconds,
172 | "16000",
173 | segmentPath
174 | )
175 |
176 | subprocess.call(segmentCmd, shell=True)
177 |
178 | modifyDeadline(ackId, 120)
179 |
180 | print ".... preparing to convert " + str(len(os.listdir(segmentsWav))) + " files"
181 | for eachFile in os.listdir(segmentsWav):
182 | wavPath = os.path.join(segmentsWav, eachFile)
183 | if os.path.isfile(wavPath):
184 | if wavPath[-4:] == ".wav":
185 |
186 | flacName = wavPath[:-4] + ".flac"
187 | flacName = flacName.replace(segmentsWav, "")
188 | flacName = flacName[1:]
189 | flacPath = os.path.join(segmentsFlac, flacName)
190 |
191 | print "..... running ffmpeg - converting to FLAC format"
192 | convertCmd = "ffmpeg -loglevel error -i %s %s" % (
193 | wavPath,
194 | flacPath
195 | )
196 | subprocess.call(convertCmd, shell=True)
197 |
198 | modifyDeadline(ackId, 120)
199 |
200 | uploadFiles(globalId, segmentsFlac, ackId, orgIdentifier, tsDir)
201 |
202 | #Clean up
203 | if os.path.exists(workingDir):
204 | shutil.rmtree(workingDir)
205 | print "... process complete"
206 | print acknowledgeMsg(ackId)
207 |
208 | return
209 |
210 |
211 | def acknowledgeMsg(ackId):
212 | postPayload = {
213 | "ackIds": [ackId]
214 | }
215 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
216 | reqUrl = "https://pubsub.googleapis.com/v1/%s:acknowledge" % subStr
217 | psMsg = psCall(reqUrl, postPayload)
218 |
219 | return "... pubsub message acknowledged\n\n"
220 |
221 |
222 | def lookupName(globalId):
223 | reqUrl = utility_service_url + "/meetingDetails?gId=%s" % globalId
224 | responseObj = requests.get(reqUrl)
225 | respTxt = responseObj.text
226 | jsonObj = json.loads(respTxt)
227 | videoName = jsonObj["videoName"]
228 | beenTranscoded = jsonObj["beenTranscoded"]
229 | orgIdentifier = jsonObj["orgIdentifier"]
230 |
231 | return videoName, beenTranscoded, orgIdentifier
232 |
233 |
234 | def toggleTranscode(globalId):
235 | reqUrl = utility_service_url + "/toggleTranscode?gId=%s" % globalId
236 | responseObj = requests.get(reqUrl)
237 | respTxt = responseObj.text
238 |
239 | return respTxt
240 |
241 |
242 | def assignId(globalId, prodTranscode):
243 | reqUrl = utility_service_url + "/idTranscode"
244 | payloadObj = {
245 | "gId": globalId,
246 | "transcode": prodTranscode
247 | }
248 | responseObj = requests.get(reqUrl, params=payloadObj)
249 | respTxt = responseObj.text
250 |
251 | return respTxt
252 |
253 |
254 | def dispatchWorker(ackId, globalId):
255 | if 1==1:
256 | videoName, beenTranscoded, orgIdentifier = lookupName(globalId)
257 | print "... beenTranscoded: " + str(beenTranscoded)
258 | if beenTranscoded == 0:
259 | toggleResp = toggleTranscode(globalId)
260 | print "... transcode marker has been updated" + str(toggleResp)
261 | if videoName is not None:
262 | epochTime = calendar.timegm(time.gmtime())
263 | tsDir = str(epochTime) + "-" + str(segment_length_minutes) + "-mins"
264 | transcode(globalId, videoName, ackId, orgIdentifier, tsDir)
265 | assignId(globalId, tsDir)
266 | else:
267 | print "... there is no video name"
268 | else:
269 | print "... entry has already been transcoded"
270 | print acknowledgeMsg(ackId)
271 |
272 |
273 | def nextAction(globalId):
274 | reqUrl = utility_service_url + "/msgPublish"
275 | payloadObj = {
276 | "msgAction": "speechJob",
277 | "topicName": "speechQueue",
278 | "gId": globalId
279 | }
280 | responseObj = requests.get(
281 | reqUrl,
282 | params = payloadObj
283 | )
284 | respTxt = responseObj.text
285 |
286 | return respTxt
287 |
288 |
289 | def main():
290 | postPayload = {
291 | "returnImmediately": True,
292 | "maxMessages": 1
293 | }
294 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
295 | reqUrl = "https://pubsub.googleapis.com/v1/%s:pull" % subStr
296 | while True:
297 | psMsg = psCall(reqUrl, postPayload)
298 | try:
299 | jsonObj = json.loads(psMsg)
300 | msgType = base64.b64decode(jsonObj["receivedMessages"][0]["message"]["data"])
301 | print "Message Received. Type = '%s'" % str(msgType)
302 | ackId = jsonObj["receivedMessages"][0]["ackId"]
303 | globalId = jsonObj["receivedMessages"][0]["message"]["attributes"]["globalId"]
304 | print ackId
305 | print globalId
306 | dispatchWorker(ackId, globalId)
307 | print nextAction(globalId)
308 | except:
309 | pass
310 | time.sleep(10)
311 |
312 |
313 | if __name__ == '__main__':
314 | main()
315 |
316 |
317 |
--------------------------------------------------------------------------------
/generate-wordcloud/worker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import time
24 | import json
25 | import base64
26 | import random
27 | import requests
28 | import calendar
29 | from scipy.misc import imsave
30 | from scipy.misc import imresize
31 | from google.cloud import pubsub
32 | from collections import Counter
33 | from google.cloud import storage
34 | from nltk.corpus import stopwords
35 | from wordcloud import WordCloud, ImageColorGenerator
36 | from matplotlib.colors import LinearSegmentedColormap
37 | from oauth2client.service_account import ServiceAccountCredentials
38 |
39 | service_account_json = "__Credential_JSON_File_Name__"
40 | dirPath = os.path.normpath(os.getcwd())
41 | service_account_path = os.path.join(dirPath, service_account_json)
42 |
43 |
44 | projectId = "__GCP_Project_ID__"
45 | topicName = "wordcloudQueue"
46 | subName = "wordcloud-creation-subscription"
47 |
48 | bucketName = "__GCS_Storage_Bucket_Name__"
49 | utility_service_url = "__Utility_Service_URL__"
50 |
51 |
52 | psClient = pubsub.SubscriberClient()
53 |
54 | topicPath = psClient.topic_path(
55 | projectId,
56 | topicName
57 | )
58 |
59 | subPath = psClient.subscription_path(
60 | projectId,
61 | subName
62 | )
63 |
64 | subObj = psClient.subscribe(
65 | subPath
66 | )
67 |
68 |
69 | def psCall(reqUrl, postPayload):
70 | scopesList = ["https://www.googleapis.com/auth/cloud-platform"]
71 | credentialsObj = ServiceAccountCredentials.from_json_keyfile_name(
72 | service_account_json,
73 | scopes = scopesList
74 | )
75 |
76 | accessToken = "Bearer %s" % credentialsObj.get_access_token().access_token
77 | headerObj = {
78 | "authorization": accessToken,
79 | }
80 |
81 | reqObj = requests.post(
82 | reqUrl,
83 | data = json.dumps(postPayload),
84 | headers = headerObj
85 | )
86 |
87 | return reqObj.text
88 |
89 |
90 | def recolorBlue(**kwargs):
91 | return "hsl(%d, %d%%, %d%%)" % (
92 | random.randint(210, 230),
93 | random.randint(60, 90),
94 | random.randint(35, 45))
95 |
96 |
97 | def getStopwords():
98 | stopwordsPath = os.path.join(dirPath, "stopwords-20180109-133115.json")
99 | with open(stopwordsPath) as stopwordFile:
100 | contentsStr = stopwordFile.read()
101 | jsonObj = json.loads(contentsStr)
102 | stopwordList = []
103 | for eachEntry in jsonObj:
104 | stopwordList.append(eachEntry["word"])
105 |
106 | return stopwordList
107 |
108 |
109 | def generateStr(globalId, orgIdentifier, prodTranscript):
110 | masterStr = ""
111 | stopWords = set(stopwords.words("english"))
112 |
113 | fileName = "rawTxt-" + str(globalId) + "-" + prodTranscript + "-list.txt"
114 | cloudPath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcripts/" + fileName
115 | clientObj = storage.Client.from_service_account_json(service_account_path)
116 | bucketObj = clientObj.get_bucket(bucketName)
117 | blobObj = bucketObj.blob(cloudPath)
118 | masterStr = blobObj.download_as_string()
119 |
120 | masterStr = masterStr.strip()
121 | masterStr = masterStr.replace("\n", " ")
122 |
123 | #masterStr = masterStr.replace("lewisville", "louisville")
124 |
125 | stopwordList = getStopwords()
126 | stopWords.update(
127 | stopwordList
128 | )
129 |
130 | cleanList = [i for i in masterStr.lower().split() if i not in stopWords]
131 |
132 | c = Counter(cleanList)
133 | cleanStr = " ".join(cleanList)
134 |
135 | return [cleanStr, fileName, len(masterStr.lower().split()), len(cleanList), c.most_common(10)]
136 |
137 |
138 | def generateWordcloud(wordStr, outputFile):
139 |
140 | themeList = {}
141 | theme01 = {}
142 | theme01["bgColor"] = "#341c01"
143 | theme01["colorList"] = ["#fffff0", "#d0aa3a", "#cea92e", "#c1762e", "#aea764", "#d59733", "#e9e3cd"]
144 | themeList["theme01"] = theme01
145 |
146 | theme02 = {}
147 | theme02["bgColor"] = "#fff"
148 | theme02["colorList"] = ["#03318c", "#021f59", "#61a2ca", "#30588c", "#32628c"]
149 | themeList["theme02"] = theme02
150 |
151 | theme03 = {}
152 | theme03["bgColor"] = "#223564"
153 | theme03["colorList"] = ["#f7e4be", "#f0f4bc", "#9a80a4", "#848da6"]
154 | themeList["theme03"] = theme03
155 |
156 | theme04 = {}
157 | theme04["bgColor"] = "#091c2b"
158 | theme04["colorList"] = ["#edecf2", "#c1d4f2", "#6d98ba", "#3669a2", "#8793dd"]
159 | themeList["theme04"] = theme04
160 |
161 | theme05 = {}
162 | theme05["bgColor"] = "#000"
163 | theme05["colorList"] = ["#b95c28", "#638db2", "#f0f0f0", "#dbcc58", "#1b3c69", "#d5a753"]
164 | themeList["theme05"] = theme05
165 |
166 | theme06 = {}
167 | theme06["bgColor"] = "#262626"
168 | theme06["colorList"] = ["#468966", "#fff0a5", "#ffb03b", "#b64926", "#8e2800"]
169 | themeList["theme06"] = theme06
170 |
171 |
172 | theme07 = {}
173 | theme07["bgColor"] = "#fff"
174 | theme07["colorList"] = ["#438D9C", "#E8A664", "#9C6043", "#171717", "#c00000"]
175 | themeList["theme07"] = theme07
176 | #colorList = ["#d35400", "#c0392b", "#e74c3c", "#e67e22", "#f39c12"]
177 | #colorList = ["#f39c12", "#e67e22", "#e74c3c", "#c0392b", "#d35400"]
178 |
179 | liveTheme = "theme07"
180 | bgColor = themeList[liveTheme]["bgColor"]
181 | colorList = themeList[liveTheme]["colorList"]
182 |
183 | #colorList = ["#f1e3be", "#f1f3be", "#927fa1", "#858ca4"]
184 |
185 | colorMap = LinearSegmentedColormap.from_list("mycmap", colorList)
186 |
187 | fontPath = os.path.join(dirPath, "fonts/LilitaOne-Regular.ttf")
188 |
189 | wordcloudObj = WordCloud(
190 | font_path = fontPath,
191 | mode = "RGBA",
192 | width = 1200,
193 | height = 852,
194 | margin = 16,
195 | random_state = 0,
196 | background_color = bgColor,
197 | normalize_plurals = True,
198 | colormap = colorMap
199 | ).generate(wordStr)
200 |
201 |
202 | #wordcloudObj.recolor(
203 | # color_func = recolorBlue,
204 | # random_state = 5
205 | #)
206 |
207 | smallerImg = imresize(wordcloudObj, [382, 538])
208 | imsave(outputFile, smallerImg)
209 |
210 |
211 | def gcsUpload(globalId, orgIdentifier, fileName, filePath):
212 | cloudPath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/wordclouds/" + fileName
213 | clientObj = storage.Client.from_service_account_json(service_account_path)
214 | bucketObj = clientObj.get_bucket(bucketName)
215 | blobObj = bucketObj.blob(cloudPath)
216 |
217 | metadataStr = "inline; filename='%s'" % fileName
218 | blobObj.content_disposition = metadataStr
219 |
220 | blobObj.upload_from_filename(filePath)
221 | blobObj.make_public()
222 |
223 | return blobObj.public_url
224 |
225 |
226 | def assignUrl(globalId, wcUrl):
227 | reqUrl = utility_service_url + "/idWordcloud"
228 | payloadObj = {
229 | "gId": globalId,
230 | "wcUrl": wcUrl
231 | }
232 | responseObj = requests.get(reqUrl, params=payloadObj)
233 | respTxt = responseObj.text
234 |
235 | return respTxt
236 |
237 |
238 | def lookupMeeting(globalId):
239 | reqUrl = utility_service_url + "/meetingDetails"
240 | payloadObj = {
241 | "gId": globalId
242 | }
243 | responseObj = requests.get(reqUrl, params=payloadObj)
244 | respTxt = responseObj.text
245 | jsonObj = json.loads(respTxt)
246 |
247 | return jsonObj["prodTranscript"], jsonObj["orgIdentifier"]
248 |
249 |
250 | def dispatchWorker(ackId, globalId):
251 | successFlag = None
252 | try:
253 | print ".. creating word cloud for meeting: " + str(globalId)
254 | prodTranscript, orgIdentifier = lookupMeeting(globalId)
255 | print "... word cloud will be made from transcript: " + str(prodTranscript)
256 |
257 | epochTime = calendar.timegm(time.gmtime())
258 | outputFile = str(epochTime) + "-" + str(globalId) + "-wordcloud-538-by-382.png"
259 | filePath = os.path.join(dirPath, outputFile)
260 |
261 | outputList = generateStr(globalId, orgIdentifier, prodTranscript)
262 | print "... pulling words from file: " + outputList[1]
263 | print ".... " + str(outputList[2]) + " words identified"
264 | if outputList[2] > 0:
265 | print ".... " + str(outputList[3]) + " significant words identified"
266 | print ".... ten most common words: "
267 | for eachEntry in outputList[4]:
268 | print "..... " + str(eachEntry)
269 | wordStr = outputList[0]
270 | generateWordcloud(wordStr, filePath)
271 |
272 | wcUrl = gcsUpload(globalId, orgIdentifier, outputFile, filePath)
273 | os.remove(filePath)
274 | print "... word cloud URL: " + wcUrl
275 | print "... URL assigned in database: " + str(assignUrl(globalId, wcUrl))
276 | successFlag = True
277 | else:
278 | print ".... stopping now because no words were identified"
279 | successFlag = False
280 | print acknowledgeMsg(ackId)
281 | except Exception as e:
282 | print acknowledgeMsg(ackId)
283 | print "skip " + e.message
284 | successFlag = False
285 |
286 | return successFlag
287 |
288 | def acknowledgeMsg(ackId):
289 | postPayload = {
290 | "ackIds": [ackId]
291 | }
292 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
293 | reqUrl = "https://pubsub.googleapis.com/v1/%s:acknowledge" % subStr
294 | psMsg = psCall(reqUrl, postPayload)
295 |
296 | return "... Pubsub message acknowledged"
297 |
298 |
299 | def nextAction(globalId):
300 | reqUrl = utility_service_url + "/msgPublish"
301 | payloadObj = {
302 | "msgAction": "index",
303 | "topicName": "indexQueue",
304 | "gId": globalId
305 | }
306 | responseObj = requests.get(
307 | reqUrl,
308 | params = payloadObj
309 | )
310 | respTxt = responseObj.text
311 |
312 | return respTxt
313 |
314 |
315 | def main():
316 | #globalId = 2729
317 | #runWorkflow(globalId)
318 |
319 | postPayload = {
320 | "returnImmediately": True,
321 | "maxMessages": 1
322 | }
323 | subStr = "projects/%s/subscriptions/%s" % (projectId, subName)
324 | reqUrl = "https://pubsub.googleapis.com/v1/%s:pull" % subStr
325 |
326 | while True:
327 | psMsg = psCall(reqUrl, postPayload)
328 | try:
329 | jsonObj = json.loads(psMsg)
330 | msgType = base64.b64decode(jsonObj["receivedMessages"][0]["message"]["data"])
331 | print "Message Received. Type = '%s'" % str(msgType)
332 | ackId = jsonObj["receivedMessages"][0]["ackId"]
333 | globalId = jsonObj["receivedMessages"][0]["message"]["attributes"]["globalId"]
334 | print ackId
335 | print globalId
336 | successFlag = dispatchWorker(ackId, globalId)
337 | if successFlag == True:
338 | pass
339 | print nextAction(globalId)
340 | else:
341 | print ".. not initiating next action"
342 | except:
343 | pass
344 | time.sleep(1)
345 |
346 |
347 | if __name__ == "__main__":
348 | main()
349 |
--------------------------------------------------------------------------------
/publish-pdf-transcript/worker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # This is not an officially supported Google product, though support
4 | # will be provided on a best-effort basis.
5 |
6 | # Copyright 2018 Google LLC
7 |
8 | # Licensed under the Apache License, Version 2.0 (the "License"); you
9 | # may not use this file except in compliance with the License.
10 |
11 | # You may obtain a copy of the License at:
12 |
13 | # https://www.apache.org/licenses/LICENSE-2.0
14 |
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 |
21 |
22 | import os
23 | import json
24 | import time
25 | import pdfkit
26 | import base64
27 | import requests
28 | from datetime import datetime
29 | from google.cloud import pubsub
30 | from google.cloud import storage
31 | from oauth2client.service_account import ServiceAccountCredentials
32 |
33 | service_account_json = "__Credential_JSON_File_Name__"
34 | dirPath = os.path.normpath(os.getcwd())
35 | service_account_path = os.path.join(dirPath, service_account_json)
36 |
37 | projectId = "__GCP_Project_ID__"
38 | topicName = "publish-transcript-queue"
39 | subName = "publish-transcript-subscription"
40 |
41 | bucketName = "__GCS_Storage_Bucket_Name__"
42 | utility_service_url = "__Utility_Service_URL__"
43 |
44 |
45 | psClient = pubsub.SubscriberClient()
46 |
47 | topicPath = psClient.topic_path(
48 | projectId,
49 | topicName
50 | )
51 |
52 | subPath = psClient.subscription_path(
53 | projectId,
54 | subName
55 | )
56 |
57 | subObj = psClient.subscribe(
58 | subPath
59 | )
60 |
61 |
62 | def mkPdf(inputFile):
63 | outputFile = inputFile.replace(".html", ".pdf")
64 | optionsObj = {
65 | "page-size": "Letter",
66 | "margin-top": "0.75in",
67 | "margin-right": "0.75in",
68 | "margin-bottom": "1.00in",
69 | "margin-left": "0.75in",
70 | "footer-center": "page [page]/[topage]",
71 | "footer-font-name": "Roboto",
72 | "footer-font-size": "8",
73 | "footer-spacing": "10"
74 | }
75 | pdfkit.from_file(
76 | inputFile, outputFile,
77 | options=optionsObj
78 | )
79 |
80 | return outputFile
81 |
82 |
83 | def lookupMeeting(globalId):
84 | reqUrl = utility_service_url + "/meetingDetails"
85 | payloadObj = {
86 | "gId": globalId
87 | }
88 | responseObj = requests.get(reqUrl, params=payloadObj)
89 | respTxt = responseObj.text
90 | jsonObj = json.loads(respTxt)
91 |
92 | return jsonObj["prodTranscript"], jsonObj["meetingDate"], jsonObj["orgIdentifier"]
93 |
94 |
95 | def formatDate(meetingDate):
96 | datetimeObj = datetime.strptime(meetingDate, "%Y%m%d")
97 | formattedDate = datetimeObj.strftime("%B %d, %Y")
98 | weekDay = datetimeObj.strftime("%A")
99 |
100 | return formattedDate, weekDay
101 |
102 |
103 | def meetingDetails(globalId):
104 | reqUrl = utility_service_url + "/meetingDetails"
105 | payloadObj = {
106 | "gId": globalId
107 | }
108 | responseObj = requests.get(reqUrl, params=payloadObj)
109 | respTxt = responseObj.text
110 | jsonObj = json.loads(respTxt)
111 |
112 | formattedDate, weekDay = formatDate(jsonObj["meetingDate"])
113 |
114 | return jsonObj["meetingDesc"], formattedDate, weekDay
115 |
116 |
117 | def gcsUpload(globalId, orgIdentifier, fileName, filePath):
118 | cloudPath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcripts/" + fileName
119 | clientObj = storage.Client.from_service_account_json(service_account_path)
120 | bucketObj = clientObj.get_bucket(bucketName)
121 | blobObj = bucketObj.blob(cloudPath)
122 |
123 | metadataStr = "inline; filename='%s'" % fileName
124 | blobObj.content_disposition = metadataStr
125 |
126 | blobObj.upload_from_filename(filePath)
127 | blobObj.make_public()
128 |
129 | return blobObj.public_url
130 |
131 |
132 | def assignUrl(globalId, transcriptUrl):
133 | reqUrl = utility_service_url + "/idTranscript"
134 | payloadObj = {
135 | "gId": globalId,
136 | "transcriptUrl": transcriptUrl
137 | }
138 | responseObj = requests.get(reqUrl, params=payloadObj)
139 | respTxt = responseObj.text
140 |
141 | return respTxt
142 |
143 |
144 | def runCycle(globalId, orgIdentifier, prodTranscript):
145 | basePath = "accounts/" + orgIdentifier + "/enrichments/" + str(globalId) + "/transcripts/"
146 | cloudPath = basePath + str(prodTranscript) + "/"
147 | clientObj = storage.Client.from_service_account_json(service_account_path)
148 | bucketObj = clientObj.get_bucket(bucketName)
149 | listObj = bucketObj.list_blobs(prefix=cloudPath)
150 | print "!!! length of listObj: " + str(listObj)
151 | transcriptList = []
152 | for eachEntry in listObj:
153 | if ".json" in eachEntry.name:
154 | transcriptList.append(str(eachEntry.name))
155 |
156 | fileCnt = 0
157 |
158 | htmlStr = ""
159 | for eachFile in sorted(transcriptList, reverse=False):
160 | fileCnt += 1
161 | blobObj = bucketObj.get_blob(eachFile)
162 | blobStr = blobObj.download_as_string()
163 | jsonObj = json.loads(blobStr)
164 |
165 | if "response" in jsonObj:
166 | if "results" in jsonObj["response"]:
167 | for eachAlt in jsonObj["response"]["results"]:
168 | tmpStr = ""
169 | timeVal = None
170 | if "alternatives" in eachAlt:
171 | timeVal = eachAlt["alternatives"][0]["words"][0]["startTime"]
172 | timeVal = timeVal.replace("s", "")
173 | timeVal = float(timeVal)
174 | if fileCnt > 1:
175 | timeVal = timeVal + ((fileCnt - 1) * 10800)
176 | displayTime = time.strftime("%H:%M:%S", time.gmtime(timeVal))
177 | transcriptStr = eachAlt["alternatives"][0]["transcript"]
178 | transcriptStr = transcriptStr.strip()
179 | transcriptStr = transcriptStr.replace("Lewisville", "Louisville")
180 | transcriptStr = transcriptStr.replace("Pro stack", "PROSTAC")
181 | transcriptStr = transcriptStr.replace("pro stack", "PROSTAC")
182 | transcriptStr = transcriptStr.replace("Pro Stacks", "PROSTAC")
183 | transcriptStr = transcriptStr.replace("pro Strat", "PROSTAC")
184 | transcriptStr = transcriptStr.replace("pro-sex", "PROSTAC")
185 | htmlStr += "%s
""" % transcriptStr 187 | 188 | #newPath = basePath + "rawTxt-" + str(globalId) + "-" + exportType + "-" + str(prodTranscript) + ".txt" 189 | ##newPath = basePath + fileName 190 | ##blobObj = bucketObj.blob(newPath) 191 | ##blobObj.upload_from_string(masterStr.strip()) 192 | 193 | return htmlStr 194 | 195 | 196 | def mkTranscript(globalId, municipality_display_name, municipality_short_name): 197 | prodTranscript, rawDate, orgIdentifier = lookupMeeting(globalId) 198 | meetingDesc, meetingDate, weekDay = meetingDetails(globalId) 199 | 200 | 201 | htmlStr = """ 202 | 203 | 204 | 240 | 241 | 242 | """ 243 | 244 | htmlStr += "