├── scientific-skills
├── document-skills
│ ├── docx
│ │ ├── scripts
│ │ │ ├── __init__.py
│ │ │ └── templates
│ │ │ │ ├── people.xml
│ │ │ │ ├── comments.xml
│ │ │ │ ├── commentsExtended.xml
│ │ │ │ ├── commentsIds.xml
│ │ │ │ └── commentsExtensible.xml
│ │ ├── ooxml
│ │ │ ├── scripts
│ │ │ │ ├── validation
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── unpack.py
│ │ │ │ └── validate.py
│ │ │ └── schemas
│ │ │ │ ├── ISO-IEC29500-4_2016
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ └── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── microsoft
│ │ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ │ ├── wml-symex-2015.xsd
│ │ │ │ ├── wml-2018.xsd
│ │ │ │ ├── wml-cid-2016.xsd
│ │ │ │ ├── wml-cex-2018.xsd
│ │ │ │ └── wml-2012.xsd
│ │ │ │ ├── ecma
│ │ │ │ └── fouth-edition
│ │ │ │ │ ├── opc-relationships.xsd
│ │ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ │ └── opc-digSig.xsd
│ │ │ │ └── mce
│ │ │ │ └── mc.xsd
│ │ └── LICENSE.txt
│ ├── pptx
│ │ ├── ooxml
│ │ │ ├── scripts
│ │ │ │ ├── validation
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── unpack.py
│ │ │ │ └── validate.py
│ │ │ └── schemas
│ │ │ │ ├── ISO-IEC29500-4_2016
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ └── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── microsoft
│ │ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ │ ├── wml-symex-2015.xsd
│ │ │ │ ├── wml-2018.xsd
│ │ │ │ ├── wml-cid-2016.xsd
│ │ │ │ ├── wml-cex-2018.xsd
│ │ │ │ └── wml-2012.xsd
│ │ │ │ ├── ecma
│ │ │ │ └── fouth-edition
│ │ │ │ │ ├── opc-relationships.xsd
│ │ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ │ └── opc-digSig.xsd
│ │ │ │ └── mce
│ │ │ │ └── mc.xsd
│ │ └── LICENSE.txt
│ ├── pdf
│ │ ├── scripts
│ │ │ ├── check_fillable_fields.py
│ │ │ ├── convert_pdf_to_images.py
│ │ │ ├── create_validation_image.py
│ │ │ ├── check_bounding_boxes.py
│ │ │ └── fill_pdf_form_with_annotations.py
│ │ └── LICENSE.txt
│ └── xlsx
│ │ └── LICENSE.txt
├── diffdock
│ └── assets
│ │ ├── batch_template.csv
│ │ └── custom_inference_config.yaml
├── perplexity-search
│ └── assets
│ │ └── .env.example
├── modal
│ └── references
│ │ ├── api_reference.md
│ │ ├── getting-started.md
│ │ └── resources.md
├── neuropixels-analysis
│ ├── LICENSE.txt
│ └── scripts
│ │ ├── export_to_phy.py
│ │ └── run_sorting.py
├── markitdown
│ └── LICENSE.txt
├── fluidsim
│ └── references
│ │ ├── installation.md
│ │ └── solvers.md
├── scientific-visualization
│ └── assets
│ │ ├── presentation.mplstyle
│ │ ├── nature.mplstyle
│ │ └── publication.mplstyle
├── deeptools
│ ├── assets
│ │ └── quick_reference.md
│ └── references
│ │ └── effective_genome_sizes.md
├── opentrons-integration
│ └── scripts
│ │ ├── basic_protocol_template.py
│ │ └── serial_dilution_template.py
├── pymoo
│ └── scripts
│ │ ├── single_objective_example.py
│ │ ├── multi_objective_example.py
│ │ └── many_objective_example.py
├── geopandas
│ └── references
│ │ └── data-structures.md
├── clinical-reports
│ └── scripts
│ │ ├── compliance_checker.py
│ │ ├── extract_clinical_data.py
│ │ ├── validate_trial_report.py
│ │ └── format_adverse_events.py
├── qiskit
│ └── references
│ │ └── setup.md
├── research-lookup
│ └── lookup.py
├── gtars
│ └── references
│ │ ├── tokenizers.md
│ │ └── refget.md
├── geniml
│ └── references
│ │ └── region2vec.md
├── arboreto
│ └── scripts
│ │ └── basic_grn_inference.py
├── scientific-schematics
│ └── example_usage.sh
├── protocolsio-integration
│ └── references
│ │ └── authentication.md
├── adaptyv
│ └── SKILL.md
├── paper-2-web
│ └── references
│ │ └── installation.md
└── aeon
│ └── references
│ └── clustering.md
├── .gitignore
├── LICENSE.md
└── .github
└── workflows
└── release.yml
/scientific-skills/document-skills/docx/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # Make scripts directory a package for relative imports in tests
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .claude
2 | .DS_Store
3 |
4 | temp/
5 |
6 | pyproject.toml
7 | uv.lock
8 |
9 | .venv/
10 | .python-version
11 | main.py
12 |
13 | __pycache__/
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/scripts/templates/people.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/scientific-skills/diffdock/assets/batch_template.csv:
--------------------------------------------------------------------------------
1 | complex_name,protein_path,ligand_description,protein_sequence
2 | example_1,protein1.pdb,CC(=O)Oc1ccccc1C(=O)O,
3 | example_2,,COc1ccc(C#N)cc1,MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK
4 | example_3,protein3.pdb,ligand3.sdf,
5 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/scripts/validation/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Validation modules for Word document processing.
3 | """
4 |
5 | from .base import BaseSchemaValidator
6 | from .docx import DOCXSchemaValidator
7 | from .pptx import PPTXSchemaValidator
8 | from .redlining import RedliningValidator
9 |
10 | __all__ = [
11 | "BaseSchemaValidator",
12 | "DOCXSchemaValidator",
13 | "PPTXSchemaValidator",
14 | "RedliningValidator",
15 | ]
16 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/scripts/validation/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Validation modules for Word document processing.
3 | """
4 |
5 | from .base import BaseSchemaValidator
6 | from .docx import DOCXSchemaValidator
7 | from .pptx import PPTXSchemaValidator
8 | from .redlining import RedliningValidator
9 |
10 | __all__ = [
11 | "BaseSchemaValidator",
12 | "DOCXSchemaValidator",
13 | "PPTXSchemaValidator",
14 | "RedliningValidator",
15 | ]
16 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pdf/scripts/check_fillable_fields.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pypdf import PdfReader
3 |
4 |
5 | # Script for Claude to run to determine whether a PDF has fillable form fields. See forms.md.
6 |
7 |
8 | reader = PdfReader(sys.argv[1])
9 | if (reader.get_fields()):
10 | print("This PDF has fillable form fields")
11 | else:
12 | print("This PDF does not have fillable form fields; you will need to visually determine where to enter data")
13 |
--------------------------------------------------------------------------------
/scientific-skills/perplexity-search/assets/.env.example:
--------------------------------------------------------------------------------
1 | # Perplexity Search Environment Configuration
2 | # Copy this file to .env and fill in your actual API key
3 |
4 | # OpenRouter API Key
5 | # Get your key from: https://openrouter.ai/keys
6 | OPENROUTER_API_KEY=sk-or-v1-your-api-key-here
7 |
8 | # Optional: Default model to use
9 | # Options: sonar-pro, sonar-pro-search, sonar, sonar-reasoning-pro, sonar-reasoning
10 | # DEFAULT_MODEL=sonar-pro
11 |
12 | # Optional: Default max tokens for responses
13 | # DEFAULT_MAX_TOKENS=4000
14 |
15 | # Optional: Default temperature (0.0 - 1.0)
16 | # DEFAULT_TEMPERATURE=0.2
17 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/microsoft/wml-2018.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/scientific-skills/modal/references/api_reference.md:
--------------------------------------------------------------------------------
1 | # Reference Documentation for Modal
2 |
3 | This is a placeholder for detailed reference documentation.
4 | Replace with actual reference content or delete if not needed.
5 |
6 | Example real reference docs from other skills:
7 | - product-management/references/communication.md - Comprehensive guide for status updates
8 | - product-management/references/context_building.md - Deep-dive on gathering context
9 | - bigquery/references/ - API references and query examples
10 |
11 | ## When Reference Docs Are Useful
12 |
13 | Reference docs are ideal for:
14 | - Comprehensive API documentation
15 | - Detailed workflow guides
16 | - Complex multi-step processes
17 | - Information too lengthy for main SKILL.md
18 | - Content that's only needed for specific use cases
19 |
20 | ## Structure Suggestions
21 |
22 | ### API Reference Example
23 | - Overview
24 | - Authentication
25 | - Endpoints with examples
26 | - Error codes
27 | - Rate limits
28 |
29 | ### Workflow Guide Example
30 | - Prerequisites
31 | - Step-by-step instructions
32 | - Common patterns
33 | - Troubleshooting
34 | - Best practices
35 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 K-Dense Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/scientific-skills/neuropixels-analysis/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Shen Lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/scientific-skills/markitdown/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Microsoft Corporation.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/scripts/unpack.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
3 |
4 | import random
5 | import sys
6 | import defusedxml.minidom
7 | import zipfile
8 | from pathlib import Path
9 |
10 | # Get command line arguments
11 | assert len(sys.argv) == 3, "Usage: python unpack.py "
12 | input_file, output_dir = sys.argv[1], sys.argv[2]
13 |
14 | # Extract and format
15 | output_path = Path(output_dir)
16 | output_path.mkdir(parents=True, exist_ok=True)
17 | zipfile.ZipFile(input_file).extractall(output_path)
18 |
19 | # Pretty print all XML files
20 | xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
21 | for xml_file in xml_files:
22 | content = xml_file.read_text(encoding="utf-8")
23 | dom = defusedxml.minidom.parseString(content)
24 | xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="ascii"))
25 |
26 | # For .docx files, suggest an RSID for tracked changes
27 | if input_file.endswith(".docx"):
28 | suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
29 | print(f"Suggested RSID for edit session: {suggested_rsid}")
30 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/scripts/unpack.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Unpack and format XML contents of Office files (.docx, .pptx, .xlsx)"""
3 |
4 | import random
5 | import sys
6 | import defusedxml.minidom
7 | import zipfile
8 | from pathlib import Path
9 |
10 | # Get command line arguments
11 | assert len(sys.argv) == 3, "Usage: python unpack.py "
12 | input_file, output_dir = sys.argv[1], sys.argv[2]
13 |
14 | # Extract and format
15 | output_path = Path(output_dir)
16 | output_path.mkdir(parents=True, exist_ok=True)
17 | zipfile.ZipFile(input_file).extractall(output_path)
18 |
19 | # Pretty print all XML files
20 | xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
21 | for xml_file in xml_files:
22 | content = xml_file.read_text(encoding="utf-8")
23 | dom = defusedxml.minidom.parseString(content)
24 | xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="ascii"))
25 |
26 | # For .docx files, suggest an RSID for tracked changes
27 | if input_file.endswith(".docx"):
28 | suggested_rsid = "".join(random.choices("0123456789ABCDEF", k=8))
29 | print(f"Suggested RSID for edit session: {suggested_rsid}")
30 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pdf/scripts/convert_pdf_to_images.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from pdf2image import convert_from_path
5 |
6 |
7 | # Converts each page of a PDF to a PNG image.
8 |
9 |
10 | def convert(pdf_path, output_dir, max_dim=1000):
11 | images = convert_from_path(pdf_path, dpi=200)
12 |
13 | for i, image in enumerate(images):
14 | # Scale image if needed to keep width/height under `max_dim`
15 | width, height = image.size
16 | if width > max_dim or height > max_dim:
17 | scale_factor = min(max_dim / width, max_dim / height)
18 | new_width = int(width * scale_factor)
19 | new_height = int(height * scale_factor)
20 | image = image.resize((new_width, new_height))
21 |
22 | image_path = os.path.join(output_dir, f"page_{i+1}.png")
23 | image.save(image_path)
24 | print(f"Saved page {i+1} as {image_path} (size: {image.size})")
25 |
26 | print(f"Converted {len(images)} pages to PNG images")
27 |
28 |
29 | if __name__ == "__main__":
30 | if len(sys.argv) != 3:
31 | print("Usage: convert_pdf_to_images.py [input pdf] [output directory]")
32 | sys.exit(1)
33 | pdf_path = sys.argv[1]
34 | output_directory = sys.argv[2]
35 | convert(pdf_path, output_directory)
36 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
8 |
9 |
10 |
11 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
8 |
9 |
10 |
11 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
9 |
10 |
11 |
12 |
13 |
14 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
9 |
10 |
11 |
12 |
13 |
14 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/scientific-skills/fluidsim/references/installation.md:
--------------------------------------------------------------------------------
1 | # FluidSim Installation
2 |
3 | ## Requirements
4 |
5 | - Python >= 3.9
6 | - Virtual environment recommended
7 |
8 | ## Installation Methods
9 |
10 | ### Basic Installation
11 |
12 | Install fluidsim using uv:
13 |
14 | ```bash
15 | uv pip install fluidsim
16 | ```
17 |
18 | ### With FFT Support (Required for Pseudospectral Solvers)
19 |
20 | Most fluidsim solvers use Fourier-based methods and require FFT libraries:
21 |
22 | ```bash
23 | uv pip install "fluidsim[fft]"
24 | ```
25 |
26 | This installs fluidfft and pyfftw dependencies.
27 |
28 | ### With MPI and FFT (For Parallel Simulations)
29 |
30 | For high-performance parallel computing:
31 |
32 | ```bash
33 | uv pip install "fluidsim[fft,mpi]"
34 | ```
35 |
36 | Note: This triggers local compilation of mpi4py.
37 |
38 | ## Environment Configuration
39 |
40 | ### Output Directories
41 |
42 | Set environment variables to control where simulation data is stored:
43 |
44 | ```bash
45 | export FLUIDSIM_PATH=/path/to/simulation/outputs
46 | export FLUIDDYN_PATH_SCRATCH=/path/to/working/directory
47 | ```
48 |
49 | ### FFT Method Selection
50 |
51 | Specify FFT implementation (optional):
52 |
53 | ```bash
54 | export FLUIDSIM_TYPE_FFT2D=fft2d.with_fftw
55 | export FLUIDSIM_TYPE_FFT3D=fft3d.with_fftw
56 | ```
57 |
58 | ## Verification
59 |
60 | Test the installation:
61 |
62 | ```bash
63 | pytest --pyargs fluidsim
64 | ```
65 |
66 | ## No Authentication Required
67 |
68 | FluidSim does not require API keys or authentication tokens.
69 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/LICENSE.txt:
--------------------------------------------------------------------------------
1 | © 2025 Anthropic, PBC. All rights reserved.
2 |
3 | LICENSE: Use of these materials (including all code, prompts, assets, files,
4 | and other components of this Skill) is governed by your agreement with
5 | Anthropic regarding use of Anthropic's services. If no separate agreement
6 | exists, use is governed by Anthropic's Consumer Terms of Service or
7 | Commercial Terms of Service, as applicable:
8 | https://www.anthropic.com/legal/consumer-terms
9 | https://www.anthropic.com/legal/commercial-terms
10 | Your applicable agreement is referred to as the "Agreement." "Services" are
11 | as defined in the Agreement.
12 |
13 | ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
14 | contrary, users may not:
15 |
16 | - Extract these materials from the Services or retain copies of these
17 | materials outside the Services
18 | - Reproduce or copy these materials, except for temporary copies created
19 | automatically during authorized use of the Services
20 | - Create derivative works based on these materials
21 | - Distribute, sublicense, or transfer these materials to any third party
22 | - Make, offer to sell, sell, or import any inventions embodied in these
23 | materials
24 | - Reverse engineer, decompile, or disassemble these materials
25 |
26 | The receipt, viewing, or possession of these materials does not convey or
27 | imply any license or right beyond those expressly granted above.
28 |
29 | Anthropic retains all right, title, and interest in these materials,
30 | including all copyrights, patents, and other intellectual property rights.
31 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pdf/LICENSE.txt:
--------------------------------------------------------------------------------
1 | © 2025 Anthropic, PBC. All rights reserved.
2 |
3 | LICENSE: Use of these materials (including all code, prompts, assets, files,
4 | and other components of this Skill) is governed by your agreement with
5 | Anthropic regarding use of Anthropic's services. If no separate agreement
6 | exists, use is governed by Anthropic's Consumer Terms of Service or
7 | Commercial Terms of Service, as applicable:
8 | https://www.anthropic.com/legal/consumer-terms
9 | https://www.anthropic.com/legal/commercial-terms
10 | Your applicable agreement is referred to as the "Agreement." "Services" are
11 | as defined in the Agreement.
12 |
13 | ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
14 | contrary, users may not:
15 |
16 | - Extract these materials from the Services or retain copies of these
17 | materials outside the Services
18 | - Reproduce or copy these materials, except for temporary copies created
19 | automatically during authorized use of the Services
20 | - Create derivative works based on these materials
21 | - Distribute, sublicense, or transfer these materials to any third party
22 | - Make, offer to sell, sell, or import any inventions embodied in these
23 | materials
24 | - Reverse engineer, decompile, or disassemble these materials
25 |
26 | The receipt, viewing, or possession of these materials does not convey or
27 | imply any license or right beyond those expressly granted above.
28 |
29 | Anthropic retains all right, title, and interest in these materials,
30 | including all copyrights, patents, and other intellectual property rights.
31 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/LICENSE.txt:
--------------------------------------------------------------------------------
1 | © 2025 Anthropic, PBC. All rights reserved.
2 |
3 | LICENSE: Use of these materials (including all code, prompts, assets, files,
4 | and other components of this Skill) is governed by your agreement with
5 | Anthropic regarding use of Anthropic's services. If no separate agreement
6 | exists, use is governed by Anthropic's Consumer Terms of Service or
7 | Commercial Terms of Service, as applicable:
8 | https://www.anthropic.com/legal/consumer-terms
9 | https://www.anthropic.com/legal/commercial-terms
10 | Your applicable agreement is referred to as the "Agreement." "Services" are
11 | as defined in the Agreement.
12 |
13 | ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
14 | contrary, users may not:
15 |
16 | - Extract these materials from the Services or retain copies of these
17 | materials outside the Services
18 | - Reproduce or copy these materials, except for temporary copies created
19 | automatically during authorized use of the Services
20 | - Create derivative works based on these materials
21 | - Distribute, sublicense, or transfer these materials to any third party
22 | - Make, offer to sell, sell, or import any inventions embodied in these
23 | materials
24 | - Reverse engineer, decompile, or disassemble these materials
25 |
26 | The receipt, viewing, or possession of these materials does not convey or
27 | imply any license or right beyond those expressly granted above.
28 |
29 | Anthropic retains all right, title, and interest in these materials,
30 | including all copyrights, patents, and other intellectual property rights.
31 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/xlsx/LICENSE.txt:
--------------------------------------------------------------------------------
1 | © 2025 Anthropic, PBC. All rights reserved.
2 |
3 | LICENSE: Use of these materials (including all code, prompts, assets, files,
4 | and other components of this Skill) is governed by your agreement with
5 | Anthropic regarding use of Anthropic's services. If no separate agreement
6 | exists, use is governed by Anthropic's Consumer Terms of Service or
7 | Commercial Terms of Service, as applicable:
8 | https://www.anthropic.com/legal/consumer-terms
9 | https://www.anthropic.com/legal/commercial-terms
10 | Your applicable agreement is referred to as the "Agreement." "Services" are
11 | as defined in the Agreement.
12 |
13 | ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
14 | contrary, users may not:
15 |
16 | - Extract these materials from the Services or retain copies of these
17 | materials outside the Services
18 | - Reproduce or copy these materials, except for temporary copies created
19 | automatically during authorized use of the Services
20 | - Create derivative works based on these materials
21 | - Distribute, sublicense, or transfer these materials to any third party
22 | - Make, offer to sell, sell, or import any inventions embodied in these
23 | materials
24 | - Reverse engineer, decompile, or disassemble these materials
25 |
26 | The receipt, viewing, or possession of these materials does not convey or
27 | imply any license or right beyond those expressly granted above.
28 |
29 | Anthropic retains all right, title, and interest in these materials,
30 | including all copyrights, patents, and other intellectual property rights.
31 |
--------------------------------------------------------------------------------
/scientific-skills/scientific-visualization/assets/presentation.mplstyle:
--------------------------------------------------------------------------------
1 | # Presentation/Poster style
2 | # Usage: plt.style.use('presentation.mplstyle')
3 | #
4 | # Larger fonts and thicker lines for presentations,
5 | # posters, and projected displays
6 |
7 | # Figure properties
8 | figure.dpi: 100
9 | figure.facecolor: white
10 | figure.constrained_layout.use: True
11 | figure.figsize: 8, 6
12 |
13 | # Font properties (larger for visibility)
14 | font.size: 14
15 | font.family: sans-serif
16 | font.sans-serif: Arial, Helvetica, Calibri
17 |
18 | # Axes properties
19 | axes.linewidth: 1.5
20 | axes.labelsize: 16
21 | axes.titlesize: 18
22 | axes.labelweight: normal
23 | axes.spines.top: False
24 | axes.spines.right: False
25 | axes.edgecolor: black
26 | axes.axisbelow: True
27 | axes.grid: False
28 | axes.prop_cycle: cycler('color', ['E69F00', '56B4E9', '009E73', 'F0E442', '0072B2', 'D55E00', 'CC79A7'])
29 |
30 | # Tick properties
31 | xtick.major.size: 6
32 | xtick.minor.size: 4
33 | xtick.major.width: 1.5
34 | xtick.minor.width: 1.0
35 | xtick.labelsize: 12
36 | xtick.direction: out
37 | ytick.major.size: 6
38 | ytick.minor.size: 4
39 | ytick.major.width: 1.5
40 | ytick.minor.width: 1.0
41 | ytick.labelsize: 12
42 | ytick.direction: out
43 |
44 | # Line properties
45 | lines.linewidth: 2.5
46 | lines.markersize: 8
47 | lines.markeredgewidth: 1.0
48 |
49 | # Legend properties
50 | legend.fontsize: 12
51 | legend.frameon: False
52 |
53 | # Save properties
54 | savefig.dpi: 300
55 | savefig.format: png
56 | savefig.bbox: tight
57 | savefig.pad_inches: 0.1
58 | savefig.facecolor: white
59 |
60 | # Image properties
61 | image.cmap: viridis
62 |
--------------------------------------------------------------------------------
/scientific-skills/scientific-visualization/assets/nature.mplstyle:
--------------------------------------------------------------------------------
1 | # Nature journal style
2 | # Usage: plt.style.use('nature.mplstyle')
3 | #
4 | # Optimized for Nature journal specifications:
5 | # - Single column: 89 mm
6 | # - Double column: 183 mm
7 | # - High resolution requirements
8 |
9 | # Figure properties
10 | figure.dpi: 100
11 | figure.facecolor: white
12 | figure.constrained_layout.use: True
13 | figure.figsize: 3.5, 2.625 # 89 mm single column, 3:4 aspect
14 |
15 | # Font properties (Nature prefers smaller fonts)
16 | font.size: 7
17 | font.family: sans-serif
18 | font.sans-serif: Arial, Helvetica
19 |
20 | # Axes properties
21 | axes.linewidth: 0.5
22 | axes.labelsize: 8
23 | axes.titlesize: 8
24 | axes.labelweight: normal
25 | axes.spines.top: False
26 | axes.spines.right: False
27 | axes.edgecolor: black
28 | axes.axisbelow: True
29 | axes.grid: False
30 | axes.prop_cycle: cycler('color', ['E69F00', '56B4E9', '009E73', 'F0E442', '0072B2', 'D55E00', 'CC79A7'])
31 |
32 | # Tick properties
33 | xtick.major.size: 2.5
34 | xtick.minor.size: 1.5
35 | xtick.major.width: 0.5
36 | xtick.minor.width: 0.4
37 | xtick.labelsize: 6
38 | xtick.direction: out
39 | ytick.major.size: 2.5
40 | ytick.minor.size: 1.5
41 | ytick.major.width: 0.5
42 | ytick.minor.width: 0.4
43 | ytick.labelsize: 6
44 | ytick.direction: out
45 |
46 | # Line properties
47 | lines.linewidth: 1.2
48 | lines.markersize: 3
49 | lines.markeredgewidth: 0.4
50 |
51 | # Legend properties
52 | legend.fontsize: 6
53 | legend.frameon: False
54 |
55 | # Save properties (Nature requirements)
56 | savefig.dpi: 600 # 1000 for line art, 600 for combination
57 | savefig.format: pdf
58 | savefig.bbox: tight
59 | savefig.pad_inches: 0.05
60 | savefig.facecolor: white
61 |
62 | # Image properties
63 | image.cmap: viridis
64 |
--------------------------------------------------------------------------------
/scientific-skills/deeptools/assets/quick_reference.md:
--------------------------------------------------------------------------------
1 | # deepTools Quick Reference
2 |
3 | ## Most Common Commands
4 |
5 | ### BAM to bigWig (normalized)
6 | ```bash
7 | bamCoverage --bam input.bam --outFileName output.bw \
8 | --normalizeUsing RPGC --effectiveGenomeSize 2913022398 \
9 | --binSize 10 --numberOfProcessors 8
10 | ```
11 |
12 | ### Compare two BAM files
13 | ```bash
14 | bamCompare -b1 treatment.bam -b2 control.bam -o ratio.bw \
15 | --operation log2 --scaleFactorsMethod readCount
16 | ```
17 |
18 | ### Correlation heatmap
19 | ```bash
20 | multiBamSummary bins --bamfiles *.bam -o counts.npz
21 | plotCorrelation -in counts.npz --corMethod pearson \
22 | --whatToShow heatmap -o correlation.png
23 | ```
24 |
25 | ### Heatmap around TSS
26 | ```bash
27 | computeMatrix reference-point -S signal.bw -R genes.bed \
28 | -b 3000 -a 3000 --referencePoint TSS -o matrix.gz
29 |
30 | plotHeatmap -m matrix.gz -o heatmap.png
31 | ```
32 |
33 | ### ChIP enrichment check
34 | ```bash
35 | plotFingerprint -b input.bam chip.bam -o fingerprint.png \
36 | --extendReads 200 --ignoreDuplicates
37 | ```
38 |
39 | ## Effective Genome Sizes
40 |
41 | | Organism | Assembly | Size |
42 | |----------|----------|------|
43 | | Human | hg38 | 2913022398 |
44 | | Mouse | mm10 | 2652783500 |
45 | | Fly | dm6 | 142573017 |
46 |
47 | ## Common Normalization Methods
48 |
49 | - **RPGC**: 1× genome coverage (requires --effectiveGenomeSize)
50 | - **CPM**: Counts per million (for fixed bins)
51 | - **RPKM**: Reads per kb per million (for genes)
52 |
53 | ## Typical Workflow
54 |
55 | 1. **QC**: plotFingerprint, plotCorrelation
56 | 2. **Coverage**: bamCoverage with normalization
57 | 3. **Comparison**: bamCompare for treatment vs control
58 | 4. **Visualization**: computeMatrix → plotHeatmap/plotProfile
59 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pdf/scripts/create_validation_image.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 |
4 | from PIL import Image, ImageDraw
5 |
6 |
7 | # Creates "validation" images with rectangles for the bounding box information that
8 | # Claude creates when determining where to add text annotations in PDFs. See forms.md.
9 |
10 |
11 | def create_validation_image(page_number, fields_json_path, input_path, output_path):
12 | # Input file should be in the `fields.json` format described in forms.md.
13 | with open(fields_json_path, 'r') as f:
14 | data = json.load(f)
15 |
16 | img = Image.open(input_path)
17 | draw = ImageDraw.Draw(img)
18 | num_boxes = 0
19 |
20 | for field in data["form_fields"]:
21 | if field["page_number"] == page_number:
22 | entry_box = field['entry_bounding_box']
23 | label_box = field['label_bounding_box']
24 | # Draw red rectangle over entry bounding box and blue rectangle over the label.
25 | draw.rectangle(entry_box, outline='red', width=2)
26 | draw.rectangle(label_box, outline='blue', width=2)
27 | num_boxes += 2
28 |
29 | img.save(output_path)
30 | print(f"Created validation image at {output_path} with {num_boxes} bounding boxes")
31 |
32 |
33 | if __name__ == "__main__":
34 | if len(sys.argv) != 5:
35 | print("Usage: create_validation_image.py [page number] [fields.json file] [input image path] [output image path]")
36 | sys.exit(1)
37 | page_number = int(sys.argv[1])
38 | fields_json_path = sys.argv[2]
39 | input_image_path = sys.argv[3]
40 | output_image_path = sys.argv[4]
41 | create_validation_image(page_number, fields_json_path, input_image_path, output_image_path)
42 |
--------------------------------------------------------------------------------
/scientific-skills/scientific-visualization/assets/publication.mplstyle:
--------------------------------------------------------------------------------
1 | # Publication-quality matplotlib style
2 | # Usage: plt.style.use('publication.mplstyle')
3 | #
4 | # This style provides clean, professional formatting suitable
5 | # for most scientific journals
6 |
7 | # Figure properties
8 | figure.dpi: 100
9 | figure.facecolor: white
10 | figure.autolayout: False
11 | figure.constrained_layout.use: True
12 | figure.figsize: 3.5, 2.5
13 |
14 | # Font properties
15 | font.size: 8
16 | font.family: sans-serif
17 | font.sans-serif: Arial, Helvetica, DejaVu Sans
18 |
19 | # Axes properties
20 | axes.linewidth: 0.5
21 | axes.labelsize: 9
22 | axes.titlesize: 9
23 | axes.labelweight: normal
24 | axes.spines.top: False
25 | axes.spines.right: False
26 | axes.spines.left: True
27 | axes.spines.bottom: True
28 | axes.edgecolor: black
29 | axes.labelcolor: black
30 | axes.axisbelow: True
31 | axes.grid: False
32 | axes.prop_cycle: cycler('color', ['E69F00', '56B4E9', '009E73', 'F0E442', '0072B2', 'D55E00', 'CC79A7', '000000'])
33 |
34 | # Tick properties
35 | xtick.major.size: 3
36 | xtick.minor.size: 2
37 | xtick.major.width: 0.5
38 | xtick.minor.width: 0.5
39 | xtick.labelsize: 7
40 | xtick.direction: out
41 | ytick.major.size: 3
42 | ytick.minor.size: 2
43 | ytick.major.width: 0.5
44 | ytick.minor.width: 0.5
45 | ytick.labelsize: 7
46 | ytick.direction: out
47 |
48 | # Line properties
49 | lines.linewidth: 1.5
50 | lines.markersize: 4
51 | lines.markeredgewidth: 0.5
52 |
53 | # Legend properties
54 | legend.fontsize: 7
55 | legend.frameon: False
56 | legend.loc: best
57 |
58 | # Save properties
59 | savefig.dpi: 300
60 | savefig.format: pdf
61 | savefig.bbox: tight
62 | savefig.pad_inches: 0.05
63 | savefig.transparent: False
64 | savefig.facecolor: white
65 |
66 | # Image properties
67 | image.cmap: viridis
68 | image.aspect: auto
69 |
--------------------------------------------------------------------------------
/scientific-skills/opentrons-integration/scripts/basic_protocol_template.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Basic Opentrons Protocol Template
4 |
5 | This template provides a minimal starting point for creating Opentrons protocols.
6 | Replace the placeholder values and add your specific protocol logic.
7 | """
8 |
9 | from opentrons import protocol_api
10 |
11 | # Metadata
12 | metadata = {
13 | 'protocolName': 'Basic Protocol Template',
14 | 'author': 'Your Name ',
15 | 'description': 'A basic protocol template for Opentrons',
16 | 'apiLevel': '2.19'
17 | }
18 |
19 | # Requirements
20 | requirements = {
21 | 'robotType': 'Flex', # or 'OT-2'
22 | 'apiLevel': '2.19'
23 | }
24 |
25 | def run(protocol: protocol_api.ProtocolContext):
26 | """
27 | Main protocol function.
28 |
29 | Args:
30 | protocol: The protocol context provided by Opentrons
31 | """
32 |
33 | # Load tip racks
34 | tips_200 = protocol.load_labware('opentrons_flex_96_tiprack_200ul', 'D1')
35 |
36 | # Load labware
37 | source_plate = protocol.load_labware(
38 | 'nest_96_wellplate_200ul_flat',
39 | 'D2',
40 | label='Source Plate'
41 | )
42 |
43 | dest_plate = protocol.load_labware(
44 | 'nest_96_wellplate_200ul_flat',
45 | 'D3',
46 | label='Destination Plate'
47 | )
48 |
49 | # Load pipette
50 | pipette = protocol.load_instrument(
51 | 'p300_single_flex',
52 | 'left',
53 | tip_racks=[tips_200]
54 | )
55 |
56 | # Protocol commands
57 | protocol.comment('Starting protocol...')
58 |
59 | # Example: Transfer from A1 to B1
60 | pipette.transfer(
61 | volume=50,
62 | source=source_plate['A1'],
63 | dest=dest_plate['B1'],
64 | new_tip='always'
65 | )
66 |
67 | protocol.comment('Protocol complete!')
68 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/scientific-skills/pymoo/scripts/single_objective_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Single-objective optimization example using pymoo.
3 |
4 | This script demonstrates basic single-objective optimization
5 | using the Genetic Algorithm on the Sphere function.
6 | """
7 |
8 | from pymoo.algorithms.soo.nonconvex.ga import GA
9 | from pymoo.problems import get_problem
10 | from pymoo.optimize import minimize
11 | from pymoo.operators.crossover.sbx import SBX
12 | from pymoo.operators.mutation.pm import PM
13 | from pymoo.operators.sampling.rnd import FloatRandomSampling
14 | from pymoo.termination import get_termination
15 | import numpy as np
16 |
17 |
18 | def run_single_objective_optimization():
19 | """Run single-objective optimization example."""
20 |
21 | # Define the problem - Sphere function (sum of squares)
22 | problem = get_problem("sphere", n_var=10)
23 |
24 | # Configure the algorithm
25 | algorithm = GA(
26 | pop_size=100,
27 | sampling=FloatRandomSampling(),
28 | crossover=SBX(prob=0.9, eta=15),
29 | mutation=PM(eta=20),
30 | eliminate_duplicates=True
31 | )
32 |
33 | # Define termination criteria
34 | termination = get_termination("n_gen", 100)
35 |
36 | # Run optimization
37 | result = minimize(
38 | problem,
39 | algorithm,
40 | termination,
41 | seed=1,
42 | verbose=True
43 | )
44 |
45 | # Print results
46 | print("\n" + "="*60)
47 | print("OPTIMIZATION RESULTS")
48 | print("="*60)
49 | print(f"Best solution: {result.X}")
50 | print(f"Best objective value: {result.F[0]:.6f}")
51 | print(f"Number of generations: {result.algorithm.n_gen}")
52 | print(f"Number of function evaluations: {result.algorithm.evaluator.n_eval}")
53 | print("="*60)
54 |
55 | return result
56 |
57 |
58 | if __name__ == "__main__":
59 | result = run_single_objective_optimization()
60 |
--------------------------------------------------------------------------------
/scientific-skills/geopandas/references/data-structures.md:
--------------------------------------------------------------------------------
1 | # GeoPandas Data Structures
2 |
3 | ## GeoSeries
4 |
5 | A GeoSeries is a vector where each entry is a set of shapes corresponding to one observation (similar to a pandas Series but with geometric data).
6 |
7 | ```python
8 | import geopandas as gpd
9 | from shapely.geometry import Point, Polygon
10 |
11 | # Create a GeoSeries from geometries
12 | points = gpd.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
13 |
14 | # Access geometric properties
15 | points.area
16 | points.length
17 | points.bounds
18 | ```
19 |
20 | ## GeoDataFrame
21 |
22 | A GeoDataFrame is a tabular data structure that contains a GeoSeries (similar to a pandas DataFrame but with geographic data).
23 |
24 | ```python
25 | # Create from dictionary
26 | gdf = gpd.GeoDataFrame({
27 | 'name': ['Point A', 'Point B'],
28 | 'value': [100, 200],
29 | 'geometry': [Point(1, 1), Point(2, 2)]
30 | })
31 |
32 | # Create from pandas DataFrame with coordinates
33 | import pandas as pd
34 | df = pd.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 3], 'name': ['A', 'B', 'C']})
35 | gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y))
36 | ```
37 |
38 | ## Key Properties
39 |
40 | - **geometry**: The active geometry column (can have multiple geometry columns)
41 | - **crs**: Coordinate reference system
42 | - **bounds**: Bounding box of all geometries
43 | - **total_bounds**: Overall bounding box
44 |
45 | ## Setting Active Geometry
46 |
47 | When a GeoDataFrame has multiple geometry columns:
48 |
49 | ```python
50 | # Set active geometry column
51 | gdf = gdf.set_geometry('other_geom_column')
52 |
53 | # Check active geometry column
54 | gdf.geometry.name
55 | ```
56 |
57 | ## Indexing and Selection
58 |
59 | Use standard pandas indexing with spatial data:
60 |
61 | ```python
62 | # Select by label
63 | gdf.loc[0]
64 |
65 | # Boolean indexing
66 | large_areas = gdf[gdf.area > 100]
67 |
68 | # Select columns
69 | gdf[['name', 'geometry']]
70 | ```
71 |
--------------------------------------------------------------------------------
/scientific-skills/pymoo/scripts/multi_objective_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Multi-objective optimization example using pymoo.
3 |
4 | This script demonstrates multi-objective optimization using
5 | NSGA-II on the ZDT1 benchmark problem.
6 | """
7 |
8 | from pymoo.algorithms.moo.nsga2 import NSGA2
9 | from pymoo.problems import get_problem
10 | from pymoo.optimize import minimize
11 | from pymoo.visualization.scatter import Scatter
12 | import matplotlib.pyplot as plt
13 |
14 |
15 | def run_multi_objective_optimization():
16 | """Run multi-objective optimization example."""
17 |
18 | # Define the problem - ZDT1 (bi-objective)
19 | problem = get_problem("zdt1")
20 |
21 | # Configure NSGA-II algorithm
22 | algorithm = NSGA2(
23 | pop_size=100,
24 | eliminate_duplicates=True
25 | )
26 |
27 | # Run optimization
28 | result = minimize(
29 | problem,
30 | algorithm,
31 | ('n_gen', 200),
32 | seed=1,
33 | verbose=True
34 | )
35 |
36 | # Print results summary
37 | print("\n" + "="*60)
38 | print("MULTI-OBJECTIVE OPTIMIZATION RESULTS")
39 | print("="*60)
40 | print(f"Number of solutions in Pareto front: {len(result.F)}")
41 | print(f"Number of generations: {result.algorithm.n_gen}")
42 | print(f"Number of function evaluations: {result.algorithm.evaluator.n_eval}")
43 | print("\nFirst 5 solutions (decision variables):")
44 | print(result.X[:5])
45 | print("\nFirst 5 solutions (objective values):")
46 | print(result.F[:5])
47 | print("="*60)
48 |
49 | # Visualize results
50 | plot = Scatter(title="ZDT1 - NSGA-II Results")
51 | plot.add(result.F, color="red", alpha=0.7, s=30, label="Obtained Pareto Front")
52 |
53 | # Add true Pareto front for comparison
54 | pf = problem.pareto_front()
55 | plot.add(pf, color="black", alpha=0.3, label="True Pareto Front")
56 |
57 | plot.show()
58 |
59 | return result
60 |
61 |
62 | if __name__ == "__main__":
63 | result = run_multi_objective_optimization()
64 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
33 |
34 |
35 |
36 |
37 |
38 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
33 |
34 |
35 |
36 |
37 |
38 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/scripts/validate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Command line tool to validate Office document XML files against XSD schemas and tracked changes.
4 |
5 | Usage:
6 | python validate.py --original
7 | """
8 |
9 | import argparse
10 | import sys
11 | from pathlib import Path
12 |
13 | from validation import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
14 |
15 |
16 | def main():
17 | parser = argparse.ArgumentParser(description="Validate Office document XML files")
18 | parser.add_argument(
19 | "unpacked_dir",
20 | help="Path to unpacked Office document directory",
21 | )
22 | parser.add_argument(
23 | "--original",
24 | required=True,
25 | help="Path to original file (.docx/.pptx/.xlsx)",
26 | )
27 | parser.add_argument(
28 | "-v",
29 | "--verbose",
30 | action="store_true",
31 | help="Enable verbose output",
32 | )
33 | args = parser.parse_args()
34 |
35 | # Validate paths
36 | unpacked_dir = Path(args.unpacked_dir)
37 | original_file = Path(args.original)
38 | file_extension = original_file.suffix.lower()
39 | assert unpacked_dir.is_dir(), f"Error: {unpacked_dir} is not a directory"
40 | assert original_file.is_file(), f"Error: {original_file} is not a file"
41 | assert file_extension in [".docx", ".pptx", ".xlsx"], (
42 | f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
43 | )
44 |
45 | # Run validations
46 | match file_extension:
47 | case ".docx":
48 | validators = [DOCXSchemaValidator, RedliningValidator]
49 | case ".pptx":
50 | validators = [PPTXSchemaValidator]
51 | case _:
52 | print(f"Error: Validation not supported for file type {file_extension}")
53 | sys.exit(1)
54 |
55 | # Run validators
56 | success = True
57 | for V in validators:
58 | validator = V(unpacked_dir, original_file, verbose=args.verbose)
59 | if not validator.validate():
60 | success = False
61 |
62 | if success:
63 | print("All validations PASSED!")
64 |
65 | sys.exit(0 if success else 1)
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/scripts/validate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Command line tool to validate Office document XML files against XSD schemas and tracked changes.
4 |
5 | Usage:
6 | python validate.py --original
7 | """
8 |
9 | import argparse
10 | import sys
11 | from pathlib import Path
12 |
13 | from validation import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
14 |
15 |
16 | def main():
17 | parser = argparse.ArgumentParser(description="Validate Office document XML files")
18 | parser.add_argument(
19 | "unpacked_dir",
20 | help="Path to unpacked Office document directory",
21 | )
22 | parser.add_argument(
23 | "--original",
24 | required=True,
25 | help="Path to original file (.docx/.pptx/.xlsx)",
26 | )
27 | parser.add_argument(
28 | "-v",
29 | "--verbose",
30 | action="store_true",
31 | help="Enable verbose output",
32 | )
33 | args = parser.parse_args()
34 |
35 | # Validate paths
36 | unpacked_dir = Path(args.unpacked_dir)
37 | original_file = Path(args.original)
38 | file_extension = original_file.suffix.lower()
39 | assert unpacked_dir.is_dir(), f"Error: {unpacked_dir} is not a directory"
40 | assert original_file.is_file(), f"Error: {original_file} is not a file"
41 | assert file_extension in [".docx", ".pptx", ".xlsx"], (
42 | f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
43 | )
44 |
45 | # Run validations
46 | match file_extension:
47 | case ".docx":
48 | validators = [DOCXSchemaValidator, RedliningValidator]
49 | case ".pptx":
50 | validators = [PPTXSchemaValidator]
51 | case _:
52 | print(f"Error: Validation not supported for file type {file_extension}")
53 | sys.exit(1)
54 |
55 | # Run validators
56 | success = True
57 | for V in validators:
58 | validator = V(unpacked_dir, original_file, verbose=args.verbose)
59 | if not validator.validate():
60 | success = False
61 |
62 | if success:
63 | print("All validations PASSED!")
64 |
65 | sys.exit(0 if success else 1)
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/scientific-skills/modal/references/getting-started.md:
--------------------------------------------------------------------------------
1 | # Getting Started with Modal
2 |
3 | ## Sign Up
4 |
5 | Sign up for free at https://modal.com and get $30/month of credits.
6 |
7 | ## Authentication
8 |
9 | Set up authentication using the Modal CLI:
10 |
11 | ```bash
12 | modal token new
13 | ```
14 |
15 | This creates credentials in `~/.modal.toml`. Alternatively, set environment variables:
16 | - `MODAL_TOKEN_ID`
17 | - `MODAL_TOKEN_SECRET`
18 |
19 | ## Basic Concepts
20 |
21 | ### Modal is Serverless
22 |
23 | Modal is a serverless platform - only pay for resources used and spin up containers on demand in seconds.
24 |
25 | ### Core Components
26 |
27 | **App**: Represents an application running on Modal, grouping one or more Functions for atomic deployment.
28 |
29 | **Function**: Acts as an independent unit that scales up and down independently. No containers run (and no charges) when there are no live inputs.
30 |
31 | **Image**: The environment code runs in - a container snapshot with dependencies installed.
32 |
33 | ## First Modal App
34 |
35 | Create a file `hello_modal.py`:
36 |
37 | ```python
38 | import modal
39 |
40 | app = modal.App(name="hello-modal")
41 |
42 | @app.function()
43 | def hello():
44 | print("Hello from Modal!")
45 | return "success"
46 |
47 | @app.local_entrypoint()
48 | def main():
49 | hello.remote()
50 | ```
51 |
52 | Run with:
53 | ```bash
54 | modal run hello_modal.py
55 | ```
56 |
57 | ## Running Apps
58 |
59 | ### Ephemeral Apps (Development)
60 |
61 | Run temporarily with `modal run`:
62 | ```bash
63 | modal run script.py
64 | ```
65 |
66 | The app stops when the script exits. Use `--detach` to keep running after client exits.
67 |
68 | ### Deployed Apps (Production)
69 |
70 | Deploy persistently with `modal deploy`:
71 | ```bash
72 | modal deploy script.py
73 | ```
74 |
75 | View deployed apps at https://modal.com/apps or with:
76 | ```bash
77 | modal app list
78 | ```
79 |
80 | Stop deployed apps:
81 | ```bash
82 | modal app stop app-name
83 | ```
84 |
85 | ## Key Features
86 |
87 | - **Fast prototyping**: Write Python, run on GPUs in seconds
88 | - **Serverless APIs**: Create web endpoints with a decorator
89 | - **Scheduled jobs**: Run cron jobs in the cloud
90 | - **GPU inference**: Access T4, L4, A10, A100, H100, H200, B200 GPUs
91 | - **Distributed volumes**: Persistent storage for ML models
92 | - **Sandboxes**: Secure containers for untrusted code
93 |
--------------------------------------------------------------------------------
/scientific-skills/pymoo/scripts/many_objective_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Many-objective optimization example using pymoo.
3 |
4 | This script demonstrates many-objective optimization (4+ objectives)
5 | using NSGA-III on the DTLZ2 benchmark problem.
6 | """
7 |
8 | from pymoo.algorithms.moo.nsga3 import NSGA3
9 | from pymoo.problems import get_problem
10 | from pymoo.optimize import minimize
11 | from pymoo.util.ref_dirs import get_reference_directions
12 | from pymoo.visualization.pcp import PCP
13 | import numpy as np
14 |
15 |
16 | def run_many_objective_optimization():
17 | """Run many-objective optimization example."""
18 |
19 | # Define the problem - DTLZ2 with 5 objectives
20 | n_obj = 5
21 | problem = get_problem("dtlz2", n_obj=n_obj)
22 |
23 | # Generate reference directions for NSGA-III
24 | # Das-Dennis method for uniform distribution
25 | ref_dirs = get_reference_directions("das-dennis", n_obj, n_partitions=12)
26 |
27 | print(f"Number of reference directions: {len(ref_dirs)}")
28 |
29 | # Configure NSGA-III algorithm
30 | algorithm = NSGA3(
31 | ref_dirs=ref_dirs,
32 | eliminate_duplicates=True
33 | )
34 |
35 | # Run optimization
36 | result = minimize(
37 | problem,
38 | algorithm,
39 | ('n_gen', 300),
40 | seed=1,
41 | verbose=True
42 | )
43 |
44 | # Print results summary
45 | print("\n" + "="*60)
46 | print("MANY-OBJECTIVE OPTIMIZATION RESULTS")
47 | print("="*60)
48 | print(f"Number of objectives: {n_obj}")
49 | print(f"Number of solutions: {len(result.F)}")
50 | print(f"Number of generations: {result.algorithm.n_gen}")
51 | print(f"Number of function evaluations: {result.algorithm.evaluator.n_eval}")
52 |
53 | # Show objective space statistics
54 | print("\nObjective space statistics:")
55 | print(f"Minimum values per objective: {result.F.min(axis=0)}")
56 | print(f"Maximum values per objective: {result.F.max(axis=0)}")
57 | print("="*60)
58 |
59 | # Visualize using Parallel Coordinate Plot
60 | plot = PCP(
61 | title=f"DTLZ2 ({n_obj} objectives) - NSGA-III Results",
62 | labels=[f"f{i+1}" for i in range(n_obj)],
63 | normalize_each_axis=True
64 | )
65 | plot.add(result.F, alpha=0.3, color="blue")
66 | plot.show()
67 |
68 | return result
69 |
70 |
71 | if __name__ == "__main__":
72 | result = run_many_objective_optimization()
73 |
--------------------------------------------------------------------------------
/scientific-skills/clinical-reports/scripts/compliance_checker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Check clinical reports for regulatory compliance (HIPAA, GCP, FDA).
4 |
5 | Usage:
6 | python compliance_checker.py
7 | """
8 |
9 | import argparse
10 | import json
11 | import re
12 |
13 |
14 | COMPLIANCE_CHECKS = {
15 | "hipaa": {
16 | "consent_statement": r"(?i)(informed\s+consent|written\s+consent).*obtained",
17 | "deidentification": r"(?i)(de-identif|anonymi[sz])",
18 | },
19 | "gcp": {
20 | "irb_approval": r"(?i)(IRB|IEC|ethics\s+committee).*approv",
21 | "protocol_compliance": r"(?i)protocol",
22 | "informed_consent": r"(?i)informed\s+consent",
23 | },
24 | "fda": {
25 | "study_id": r"(?i)(IND|IDE|protocol)\s+(number|#)[:]\s*\S+",
26 | "safety_reporting": r"(?i)(adverse\s+event|SAE)",
27 | }
28 | }
29 |
30 |
31 | def check_compliance(filename: str) -> dict:
32 | """Check regulatory compliance."""
33 | with open(filename, 'r', encoding='utf-8') as f:
34 | content = f.read()
35 |
36 | results = {}
37 | for regulation, checks in COMPLIANCE_CHECKS.items():
38 | reg_results = {}
39 | for check_name, pattern in checks.items():
40 | reg_results[check_name] = bool(re.search(pattern, content))
41 | results[regulation] = reg_results
42 |
43 | return {"filename": filename, "compliance": results}
44 |
45 |
46 | def main():
47 | """Main entry point."""
48 | parser = argparse.ArgumentParser(description="Check regulatory compliance")
49 | parser.add_argument("input_file", help="Path to clinical report")
50 | parser.add_argument("--json", action="store_true")
51 |
52 | args = parser.parse_args()
53 |
54 | try:
55 | report = check_compliance(args.input_file)
56 |
57 | if args.json:
58 | print(json.dumps(report, indent=2))
59 | else:
60 | print("\nRegulatory Compliance Check:\n")
61 | for reg, checks in report["compliance"].items():
62 | print(f"{reg.upper()}:")
63 | for check, passed in checks.items():
64 | symbol = "✓" if passed else "✗"
65 | print(f" {symbol} {check}")
66 | print()
67 |
68 | return 0
69 |
70 | except Exception as e:
71 | print(f"Error: {e}")
72 | return 1
73 |
74 |
75 | if __name__ == "__main__":
76 | import sys
77 | sys.exit(main())
78 |
79 |
--------------------------------------------------------------------------------
/scientific-skills/qiskit/references/setup.md:
--------------------------------------------------------------------------------
1 | # Qiskit Setup and Installation
2 |
3 | ## Installation
4 |
5 | Install Qiskit using uv:
6 |
7 | ```bash
8 | uv pip install qiskit
9 | ```
10 |
11 | For visualization capabilities:
12 |
13 | ```bash
14 | uv pip install "qiskit[visualization]" matplotlib
15 | ```
16 |
17 | ## Python Environment Setup
18 |
19 | Create and activate a virtual environment to isolate dependencies:
20 |
21 | ```bash
22 | # macOS/Linux
23 | python3 -m venv .venv
24 | source .venv/bin/activate
25 |
26 | # Windows
27 | python -m venv .venv
28 | .venv\Scripts\activate
29 | ```
30 |
31 | ## Supported Python Versions
32 |
33 | Check the [Qiskit PyPI page](https://pypi.org/project/qiskit/) for currently supported Python versions. As of 2025, Qiskit typically supports Python 3.8+.
34 |
35 | ## IBM Quantum Account Setup
36 |
37 | To run circuits on real IBM Quantum hardware, you need an IBM Quantum account and API token.
38 |
39 | ### Creating an Account
40 |
41 | 1. Visit [IBM Quantum Platform](https://quantum.ibm.com/)
42 | 2. Sign up for a free account
43 | 3. Navigate to your account settings to retrieve your API token
44 |
45 | ### Configuring Authentication
46 |
47 | Save your IBM Quantum credentials:
48 |
49 | ```python
50 | from qiskit_ibm_runtime import QiskitRuntimeService
51 |
52 | # Save credentials (first time only)
53 | QiskitRuntimeService.save_account(
54 | channel="ibm_quantum",
55 | token="YOUR_IBM_QUANTUM_TOKEN"
56 | )
57 |
58 | # Later sessions - load saved credentials
59 | service = QiskitRuntimeService()
60 | ```
61 |
62 | ### Environment Variable Method
63 |
64 | Alternatively, set the API token as an environment variable:
65 |
66 | ```bash
67 | export QISKIT_IBM_TOKEN="YOUR_IBM_QUANTUM_TOKEN"
68 | ```
69 |
70 | ## Local Development (No Account Required)
71 |
72 | You can build and test quantum circuits locally without an IBM Quantum account using simulators:
73 |
74 | ```python
75 | from qiskit import QuantumCircuit
76 | from qiskit.primitives import StatevectorSampler
77 |
78 | qc = QuantumCircuit(2)
79 | qc.h(0)
80 | qc.cx(0, 1)
81 | qc.measure_all()
82 |
83 | # Run locally with simulator
84 | sampler = StatevectorSampler()
85 | result = sampler.run([qc], shots=1024).result()
86 | ```
87 |
88 | ## Verifying Installation
89 |
90 | Test your installation:
91 |
92 | ```python
93 | import qiskit
94 | print(qiskit.__version__)
95 |
96 | from qiskit import QuantumCircuit
97 | qc = QuantumCircuit(2)
98 | print("Qiskit installed successfully!")
99 | ```
100 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/scripts/templates/comments.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/scripts/templates/commentsExtended.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/scripts/templates/commentsIds.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/scripts/templates/commentsExtensible.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
9 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
9 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/scientific-skills/fluidsim/references/solvers.md:
--------------------------------------------------------------------------------
1 | # FluidSim Solvers
2 |
3 | FluidSim provides multiple solvers for different fluid dynamics equations. All solvers work on periodic domains using pseudospectral methods with FFT.
4 |
5 | ## Available Solvers
6 |
7 | ### 2D Incompressible Navier-Stokes
8 |
9 | **Solver key**: `ns2d`
10 |
11 | **Import**:
12 | ```python
13 | from fluidsim.solvers.ns2d.solver import Simul
14 | # or dynamically
15 | Simul = fluidsim.import_simul_class_from_key("ns2d")
16 | ```
17 |
18 | **Use for**: 2D turbulence studies, vortex dynamics, fundamental fluid flow simulations
19 |
20 | **Key features**: Energy and enstrophy cascades, vorticity dynamics
21 |
22 | ### 3D Incompressible Navier-Stokes
23 |
24 | **Solver key**: `ns3d`
25 |
26 | **Import**:
27 | ```python
28 | from fluidsim.solvers.ns3d.solver import Simul
29 | ```
30 |
31 | **Use for**: 3D turbulence, realistic fluid flow simulations, high-resolution DNS
32 |
33 | **Key features**: Full 3D turbulence dynamics, parallel computing support
34 |
35 | ### Stratified Flows (2D/3D)
36 |
37 | **Solver keys**: `ns2d.strat`, `ns3d.strat`
38 |
39 | **Import**:
40 | ```python
41 | from fluidsim.solvers.ns2d.strat.solver import Simul # 2D
42 | from fluidsim.solvers.ns3d.strat.solver import Simul # 3D
43 | ```
44 |
45 | **Use for**: Oceanic and atmospheric flows, density-driven flows
46 |
47 | **Key features**: Boussinesq approximation, buoyancy effects, constant Brunt-Väisälä frequency
48 |
49 | **Parameters**: Set stratification via `params.N` (Brunt-Väisälä frequency)
50 |
51 | ### Shallow Water Equations
52 |
53 | **Solver key**: `sw1l` (one-layer)
54 |
55 | **Import**:
56 | ```python
57 | from fluidsim.solvers.sw1l.solver import Simul
58 | ```
59 |
60 | **Use for**: Geophysical flows, tsunami modeling, rotating flows
61 |
62 | **Key features**: Rotating frame support, geostrophic balance
63 |
64 | **Parameters**: Set rotation via `params.f` (Coriolis parameter)
65 |
66 | ### Föppl-von Kármán Equations
67 |
68 | **Solver key**: `fvk` (elastic plate equations)
69 |
70 | **Import**:
71 | ```python
72 | from fluidsim.solvers.fvk.solver import Simul
73 | ```
74 |
75 | **Use for**: Elastic plate dynamics, fluid-structure interaction studies
76 |
77 | ## Solver Selection Guide
78 |
79 | Choose a solver based on the physical problem:
80 |
81 | 1. **2D turbulence, quick testing**: Use `ns2d`
82 | 2. **3D flows, realistic simulations**: Use `ns3d`
83 | 3. **Density-stratified flows**: Use `ns2d.strat` or `ns3d.strat`
84 | 4. **Geophysical flows, rotating systems**: Use `sw1l`
85 | 5. **Elastic plates**: Use `fvk`
86 |
87 | ## Modified Versions
88 |
89 | Many solvers have modified versions with additional physics:
90 | - Forcing terms
91 | - Different boundary conditions
92 | - Additional scalar fields
93 |
94 | Check `fluidsim.solvers` module for complete list.
95 |
--------------------------------------------------------------------------------
/scientific-skills/neuropixels-analysis/scripts/export_to_phy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Export sorting results to Phy for manual curation.
4 |
5 | Usage:
6 | python export_to_phy.py metrics/analyzer --output phy_export/
7 | """
8 |
9 | import argparse
10 | from pathlib import Path
11 |
12 | import spikeinterface.full as si
13 | from spikeinterface.exporters import export_to_phy
14 |
15 |
16 | def export_phy(
17 | analyzer_path: str,
18 | output_dir: str,
19 | copy_binary: bool = True,
20 | compute_amplitudes: bool = True,
21 | compute_pc_features: bool = True,
22 | n_jobs: int = -1,
23 | ):
24 | """Export to Phy format."""
25 |
26 | print(f"Loading analyzer from: {analyzer_path}")
27 | analyzer = si.load_sorting_analyzer(analyzer_path)
28 |
29 | print(f"Units: {len(analyzer.sorting.unit_ids)}")
30 |
31 | output_path = Path(output_dir)
32 |
33 | # Compute required extensions if missing
34 | if compute_amplitudes and analyzer.get_extension('spike_amplitudes') is None:
35 | print("Computing spike amplitudes...")
36 | analyzer.compute('spike_amplitudes')
37 |
38 | if compute_pc_features and analyzer.get_extension('principal_components') is None:
39 | print("Computing principal components...")
40 | analyzer.compute('principal_components', n_components=5, mode='by_channel_local')
41 |
42 | print(f"Exporting to Phy: {output_path}")
43 | export_to_phy(
44 | analyzer,
45 | output_folder=output_path,
46 | copy_binary=copy_binary,
47 | compute_amplitudes=compute_amplitudes,
48 | compute_pc_features=compute_pc_features,
49 | n_jobs=n_jobs,
50 | )
51 |
52 | print("\nExport complete!")
53 | print(f"To open in Phy, run:")
54 | print(f" phy template-gui {output_path / 'params.py'}")
55 |
56 |
57 | def main():
58 | parser = argparse.ArgumentParser(description='Export to Phy')
59 | parser.add_argument('analyzer', help='Path to sorting analyzer')
60 | parser.add_argument('--output', '-o', default='phy_export/', help='Output directory')
61 | parser.add_argument('--no-binary', action='store_true', help='Skip copying binary file')
62 | parser.add_argument('--no-amplitudes', action='store_true', help='Skip amplitude computation')
63 | parser.add_argument('--no-pc', action='store_true', help='Skip PC feature computation')
64 | parser.add_argument('--n-jobs', type=int, default=-1, help='Number of parallel jobs')
65 |
66 | args = parser.parse_args()
67 |
68 | export_phy(
69 | args.analyzer,
70 | args.output,
71 | copy_binary=not args.no_binary,
72 | compute_amplitudes=not args.no_amplitudes,
73 | compute_pc_features=not args.no_pc,
74 | n_jobs=args.n_jobs,
75 | )
76 |
77 |
78 | if __name__ == '__main__':
79 | main()
80 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
10 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd:
--------------------------------------------------------------------------------
1 |
2 |
8 |
10 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/scientific-skills/research-lookup/lookup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Research Lookup Tool for Claude Code
4 | Performs research queries using Perplexity Sonar Pro Search via OpenRouter.
5 | """
6 |
7 | import os
8 | import sys
9 | import json
10 | from typing import Dict, List, Optional
11 |
12 | # Import the main research lookup class
13 | sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'scripts'))
14 | from research_lookup import ResearchLookup
15 |
16 |
17 | def format_response(result: Dict) -> str:
18 | """Format the research result for display."""
19 | if not result["success"]:
20 | return f"❌ Research lookup failed: {result['error']}"
21 |
22 | response = result["response"]
23 | citations = result["citations"]
24 |
25 | # Format the output for Claude Code
26 | output = f"""🔍 **Research Results**
27 |
28 | **Query:** {result['query']}
29 | **Model:** {result['model']}
30 | **Timestamp:** {result['timestamp']}
31 |
32 | ---
33 |
34 | {response}
35 |
36 | """
37 |
38 | if citations:
39 | output += f"\n**Extracted Citations ({len(citations)}):**\n"
40 | for i, citation in enumerate(citations, 1):
41 | if citation.get("doi"):
42 | output += f"{i}. DOI: {citation['doi']}\n"
43 | elif citation.get("authors") and citation.get("year"):
44 | output += f"{i}. {citation['authors']} ({citation['year']})\n"
45 | else:
46 | output += f"{i}. {citation}\n"
47 |
48 | if result.get("usage"):
49 | usage = result["usage"]
50 | output += f"\n**Usage:** {usage.get('total_tokens', 'N/A')} tokens"
51 |
52 | return output
53 |
54 |
55 | def main():
56 | """Main entry point for Claude Code tool."""
57 | # Check for API key
58 | if not os.getenv("OPENROUTER_API_KEY"):
59 | print("❌ Error: OPENROUTER_API_KEY environment variable not set")
60 | print("Please set it in your .env file or export it:")
61 | print(" export OPENROUTER_API_KEY='your_openrouter_api_key'")
62 | return 1
63 |
64 | # Get query from command line arguments
65 | if len(sys.argv) < 2:
66 | print("❌ Error: No query provided")
67 | print("Usage: python lookup.py 'your research query here'")
68 | return 1
69 |
70 | query = " ".join(sys.argv[1:])
71 |
72 | try:
73 | # Initialize research tool
74 | research = ResearchLookup()
75 |
76 | # Perform lookup
77 | print(f"🔍 Researching: {query}")
78 | result = research.lookup(query)
79 |
80 | # Format and output result
81 | formatted_output = format_response(result)
82 | print(formatted_output)
83 |
84 | # Return success code
85 | return 0 if result["success"] else 1
86 |
87 | except Exception as e:
88 | print(f"❌ Error: {str(e)}")
89 | return 1
90 |
91 |
92 | if __name__ == "__main__":
93 | exit(main())
94 |
--------------------------------------------------------------------------------
/scientific-skills/gtars/references/tokenizers.md:
--------------------------------------------------------------------------------
1 | # Genomic Tokenizers
2 |
3 | Tokenizers convert genomic regions into discrete tokens for machine learning applications, particularly useful for training genomic deep learning models.
4 |
5 | ## Python API
6 |
7 | ### Creating a Tokenizer
8 |
9 | Load tokenizer configurations from various sources:
10 |
11 | ```python
12 | import gtars
13 |
14 | # From BED file
15 | tokenizer = gtars.tokenizers.TreeTokenizer.from_bed_file("regions.bed")
16 |
17 | # From configuration file
18 | tokenizer = gtars.tokenizers.TreeTokenizer.from_config("tokenizer_config.yaml")
19 |
20 | # From region string
21 | tokenizer = gtars.tokenizers.TreeTokenizer.from_region_string("chr1:1000-2000")
22 | ```
23 |
24 | ### Tokenizing Genomic Regions
25 |
26 | Convert genomic coordinates to tokens:
27 |
28 | ```python
29 | # Tokenize a single region
30 | token = tokenizer.tokenize("chr1", 1000, 2000)
31 |
32 | # Tokenize multiple regions
33 | tokens = []
34 | for chrom, start, end in regions:
35 | token = tokenizer.tokenize(chrom, start, end)
36 | tokens.append(token)
37 | ```
38 |
39 | ### Token Properties
40 |
41 | Access token information:
42 |
43 | ```python
44 | # Get token ID
45 | token_id = token.id
46 |
47 | # Get genomic coordinates
48 | chrom = token.chromosome
49 | start = token.start
50 | end = token.end
51 |
52 | # Get token metadata
53 | metadata = token.metadata
54 | ```
55 |
56 | ## Use Cases
57 |
58 | ### Machine Learning Preprocessing
59 |
60 | Tokenizers are essential for preparing genomic data for ML models:
61 |
62 | 1. **Sequence modeling**: Convert genomic intervals into discrete tokens for transformer models
63 | 2. **Position encoding**: Create consistent positional encodings across datasets
64 | 3. **Data augmentation**: Generate alternative tokenizations for training
65 |
66 | ### Integration with geniml
67 |
68 | The tokenizers module integrates seamlessly with the geniml library for genomic ML:
69 |
70 | ```python
71 | # Tokenize regions for geniml
72 | from gtars.tokenizers import TreeTokenizer
73 | import geniml
74 |
75 | tokenizer = TreeTokenizer.from_bed_file("training_regions.bed")
76 | tokens = [tokenizer.tokenize(r.chrom, r.start, r.end) for r in regions]
77 |
78 | # Use tokens in geniml models
79 | model = geniml.Model(vocab_size=tokenizer.vocab_size)
80 | ```
81 |
82 | ## Configuration Format
83 |
84 | Tokenizer configuration files support YAML format:
85 |
86 | ```yaml
87 | # tokenizer_config.yaml
88 | type: tree
89 | resolution: 1000 # Token resolution in base pairs
90 | chromosomes:
91 | - chr1
92 | - chr2
93 | - chr3
94 | options:
95 | overlap_handling: merge
96 | gap_threshold: 100
97 | ```
98 |
99 | ## Performance Considerations
100 |
101 | - TreeTokenizer uses efficient data structures for fast tokenization
102 | - Batch tokenization is recommended for large datasets
103 | - Pre-loading tokenizers reduces overhead for repeated operations
104 |
--------------------------------------------------------------------------------
/scientific-skills/neuropixels-analysis/scripts/run_sorting.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Run spike sorting on preprocessed recording.
4 |
5 | Usage:
6 | python run_sorting.py preprocessed/ --sorter kilosort4 --output sorting/
7 | """
8 |
9 | import argparse
10 | from pathlib import Path
11 |
12 | import spikeinterface.full as si
13 |
14 |
15 | # Default parameters for each sorter
16 | SORTER_DEFAULTS = {
17 | 'kilosort4': {
18 | 'batch_size': 30000,
19 | 'nblocks': 1,
20 | 'Th_learned': 8,
21 | 'Th_universal': 9,
22 | },
23 | 'kilosort3': {
24 | 'do_CAR': False, # Already done in preprocessing
25 | },
26 | 'spykingcircus2': {
27 | 'apply_preprocessing': False,
28 | },
29 | 'mountainsort5': {
30 | 'filter': False,
31 | 'whiten': False,
32 | },
33 | }
34 |
35 |
36 | def run_sorting(
37 | input_path: str,
38 | output_dir: str,
39 | sorter: str = 'kilosort4',
40 | sorter_params: dict = None,
41 | n_jobs: int = -1,
42 | ):
43 | """Run spike sorting."""
44 |
45 | print(f"Loading preprocessed recording from: {input_path}")
46 | recording = si.load_extractor(Path(input_path) / 'preprocessed')
47 |
48 | print(f"Recording: {recording.get_num_channels()} channels, {recording.get_total_duration():.1f}s")
49 |
50 | # Get sorter parameters
51 | params = SORTER_DEFAULTS.get(sorter, {}).copy()
52 | if sorter_params:
53 | params.update(sorter_params)
54 |
55 | print(f"Running {sorter} with params: {params}")
56 |
57 | output_path = Path(output_dir)
58 |
59 | # Run sorter (note: parameter is 'folder' not 'output_folder' in newer SpikeInterface)
60 | sorting = si.run_sorter(
61 | sorter,
62 | recording,
63 | folder=output_path / f'{sorter}_output',
64 | verbose=True,
65 | **params,
66 | )
67 |
68 | print(f"\nSorting complete!")
69 | print(f" Units found: {len(sorting.unit_ids)}")
70 | print(f" Total spikes: {sum(len(sorting.get_unit_spike_train(uid)) for uid in sorting.unit_ids)}")
71 |
72 | # Save sorting
73 | sorting.save(folder=output_path / 'sorting')
74 | print(f" Saved to: {output_path / 'sorting'}")
75 |
76 | return sorting
77 |
78 |
79 | def main():
80 | parser = argparse.ArgumentParser(description='Run spike sorting')
81 | parser.add_argument('input', help='Path to preprocessed recording')
82 | parser.add_argument('--output', '-o', default='sorting/', help='Output directory')
83 | parser.add_argument('--sorter', '-s', default='kilosort4',
84 | choices=['kilosort4', 'kilosort3', 'spykingcircus2', 'mountainsort5'])
85 | parser.add_argument('--n-jobs', type=int, default=-1, help='Number of parallel jobs')
86 |
87 | args = parser.parse_args()
88 |
89 | run_sorting(
90 | args.input,
91 | args.output,
92 | sorter=args.sorter,
93 | n_jobs=args.n_jobs,
94 | )
95 |
96 |
97 | if __name__ == '__main__':
98 | main()
99 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
39 |
40 |
41 |
42 |
43 |
44 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
39 |
40 |
41 |
42 |
43 |
44 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/scientific-skills/clinical-reports/scripts/extract_clinical_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Extract structured clinical data from reports.
4 |
5 | Usage:
6 | python extract_clinical_data.py
7 | """
8 |
9 | import argparse
10 | import json
11 | import re
12 |
13 |
14 | def extract_vital_signs(content: str) -> dict:
15 | """Extract vital signs."""
16 | vitals = {}
17 | patterns = {
18 | "temperature": r"(?i)temp(?:erature)?[:]\s*([\d.]+)\s*°?F",
19 | "bp": r"(?i)BP[:]\s*(\d+/\d+)",
20 | "hr": r"(?i)HR[:]\s*(\d+)",
21 | "rr": r"(?i)RR[:]\s*(\d+)",
22 | "spo2": r"(?i)SpO2[:]\s*([\d.]+)%",
23 | }
24 |
25 | for vital, pattern in patterns.items():
26 | match = re.search(pattern, content)
27 | if match:
28 | vitals[vital] = match.group(1)
29 |
30 | return vitals
31 |
32 |
33 | def extract_demographics(content: str) -> dict:
34 | """Extract patient demographics."""
35 | demographics = {}
36 | patterns = {
37 | "age": r"(?i)(\d+)[\s-]year[\s-]old",
38 | "sex": r"(?i)(male|female|M|F)",
39 | }
40 |
41 | for demo, pattern in patterns.items():
42 | match = re.search(pattern, content)
43 | if match:
44 | demographics[demo] = match.group(1)
45 |
46 | return demographics
47 |
48 |
49 | def extract_medications(content: str) -> list:
50 | """Extract medication list."""
51 | meds = []
52 | # Simple pattern for common medication format
53 | pattern = r"(?i)(\w+)\s+(\d+\s*mg)\s+(PO|IV|SC)\s+(daily|BID|TID|QID)"
54 | matches = re.findall(pattern, content)
55 |
56 | for match in matches:
57 | meds.append({
58 | "drug": match[0],
59 | "dose": match[1],
60 | "route": match[2],
61 | "frequency": match[3]
62 | })
63 |
64 | return meds
65 |
66 |
67 | def main():
68 | """Main entry point."""
69 | parser = argparse.ArgumentParser(description="Extract clinical data")
70 | parser.add_argument("input_file", help="Path to clinical report")
71 | parser.add_argument("--output", "-o", help="Output JSON file")
72 |
73 | args = parser.parse_args()
74 |
75 | try:
76 | with open(args.input_file, 'r', encoding='utf-8') as f:
77 | content = f.read()
78 |
79 | extracted_data = {
80 | "demographics": extract_demographics(content),
81 | "vital_signs": extract_vital_signs(content),
82 | "medications": extract_medications(content),
83 | }
84 |
85 | if args.output:
86 | with open(args.output, 'w') as f:
87 | json.dump(extracted_data, f, indent=2)
88 | print(f"✓ Data extracted to: {args.output}")
89 | else:
90 | print(json.dumps(extracted_data, indent=2))
91 |
92 | return 0
93 |
94 | except Exception as e:
95 | print(f"Error: {e}")
96 | return 1
97 |
98 |
99 | if __name__ == "__main__":
100 | import sys
101 | sys.exit(main())
102 |
103 |
--------------------------------------------------------------------------------
/scientific-skills/diffdock/assets/custom_inference_config.yaml:
--------------------------------------------------------------------------------
1 | # DiffDock Custom Inference Configuration Template
2 | # Copy and modify this file to customize inference parameters
3 |
4 | # Model paths (usually don't need to change these)
5 | model_dir: ./workdir/v1.1/score_model
6 | confidence_model_dir: ./workdir/v1.1/confidence_model
7 | ckpt: best_ema_inference_epoch_model.pt
8 | confidence_ckpt: best_model_epoch75.pt
9 |
10 | # Model version flags
11 | old_score_model: false # Set to true to use original DiffDock instead of DiffDock-L
12 | old_filtering_model: true
13 |
14 | # Inference steps
15 | inference_steps: 20 # Increase for potentially better accuracy (e.g., 25-30)
16 | actual_steps: 19
17 | no_final_step_noise: true
18 |
19 | # Sampling parameters
20 | samples_per_complex: 10 # Increase for difficult cases (e.g., 20-40)
21 | sigma_schedule: expbeta
22 | initial_noise_std_proportion: 1.46
23 |
24 | # Temperature controls - Adjust these to balance exploration vs accuracy
25 | # Higher values = more diverse predictions, lower values = more focused predictions
26 |
27 | # Sampling temperatures
28 | temp_sampling_tr: 1.17 # Translation sampling temperature
29 | temp_sampling_rot: 2.06 # Rotation sampling temperature
30 | temp_sampling_tor: 7.04 # Torsion sampling temperature (increase for flexible ligands)
31 |
32 | # Psi angle temperatures
33 | temp_psi_tr: 0.73
34 | temp_psi_rot: 0.90
35 | temp_psi_tor: 0.59
36 |
37 | # Sigma data temperatures
38 | temp_sigma_data_tr: 0.93
39 | temp_sigma_data_rot: 0.75
40 | temp_sigma_data_tor: 0.69
41 |
42 | # Feature flags
43 | no_model: false
44 | no_random: false
45 | ode: false # Set to true to use ODE solver instead of SDE
46 | different_schedules: false
47 | limit_failures: 5
48 |
49 | # Output settings
50 | # save_visualisation: true # Uncomment to save SDF files
51 |
52 | # ============================================================================
53 | # Configuration Presets for Common Use Cases
54 | # ============================================================================
55 |
56 | # PRESET 1: High Accuracy (slower, more thorough)
57 | # samples_per_complex: 30
58 | # inference_steps: 25
59 | # temp_sampling_tr: 1.0
60 | # temp_sampling_rot: 1.8
61 | # temp_sampling_tor: 6.5
62 |
63 | # PRESET 2: Fast Screening (faster, less thorough)
64 | # samples_per_complex: 5
65 | # inference_steps: 15
66 | # temp_sampling_tr: 1.3
67 | # temp_sampling_rot: 2.2
68 | # temp_sampling_tor: 7.5
69 |
70 | # PRESET 3: Flexible Ligands (more conformational diversity)
71 | # samples_per_complex: 20
72 | # inference_steps: 20
73 | # temp_sampling_tr: 1.2
74 | # temp_sampling_rot: 2.1
75 | # temp_sampling_tor: 8.5 # Increased torsion temperature
76 |
77 | # PRESET 4: Rigid Ligands (more focused predictions)
78 | # samples_per_complex: 10
79 | # inference_steps: 20
80 | # temp_sampling_tr: 1.1
81 | # temp_sampling_rot: 2.0
82 | # temp_sampling_tor: 6.0 # Decreased torsion temperature
83 |
84 | # ============================================================================
85 | # Usage Example
86 | # ============================================================================
87 | # python -m inference \
88 | # --config custom_inference_config.yaml \
89 | # --protein_ligand_csv input.csv \
90 | # --out_dir results/
91 |
--------------------------------------------------------------------------------
/scientific-skills/geniml/references/region2vec.md:
--------------------------------------------------------------------------------
1 | # Region2Vec: Genomic Region Embeddings
2 |
3 | ## Overview
4 |
5 | Region2Vec generates unsupervised embeddings of genomic regions and region sets from BED files. It maps genomic regions to a vocabulary, creates sentences through concatenation, and applies word2vec training to learn meaningful representations.
6 |
7 | ## When to Use
8 |
9 | Use Region2Vec when working with:
10 | - BED file collections requiring dimensionality reduction
11 | - Genomic region similarity analysis
12 | - Downstream ML tasks requiring region feature vectors
13 | - Comparative analysis across multiple genomic datasets
14 |
15 | ## Workflow
16 |
17 | ### Step 1: Prepare Data
18 |
19 | Gather BED files in a source folder. Optionally specify a file list (default uses all files in the directory). Prepare a universe file as the reference vocabulary for tokenization.
20 |
21 | ### Step 2: Tokenization
22 |
23 | Run hard tokenization to convert genomic regions into tokens:
24 |
25 | ```python
26 | from geniml.tokenization import hard_tokenization
27 |
28 | src_folder = '/path/to/raw/bed/files'
29 | dst_folder = '/path/to/tokenized_files'
30 | universe_file = '/path/to/universe_file.bed'
31 |
32 | hard_tokenization(src_folder, dst_folder, universe_file, 1e-9)
33 | ```
34 |
35 | The final parameter (1e-9) is the p-value threshold for tokenization overlap significance.
36 |
37 | ### Step 3: Train Region2Vec Model
38 |
39 | Execute Region2Vec training on the tokenized files:
40 |
41 | ```python
42 | from geniml.region2vec import region2vec
43 |
44 | region2vec(
45 | token_folder=dst_folder,
46 | save_dir='./region2vec_model',
47 | num_shufflings=1000,
48 | embedding_dim=100,
49 | context_len=50,
50 | window_size=5,
51 | init_lr=0.025
52 | )
53 | ```
54 |
55 | ## Key Parameters
56 |
57 | | Parameter | Description | Typical Range |
58 | |-----------|-------------|---------------|
59 | | `init_lr` | Initial learning rate | 0.01 - 0.05 |
60 | | `window_size` | Context window size | 3 - 10 |
61 | | `num_shufflings` | Number of shuffling iterations | 500 - 2000 |
62 | | `embedding_dim` | Dimension of output embeddings | 50 - 300 |
63 | | `context_len` | Context length for training | 30 - 100 |
64 |
65 | ## CLI Usage
66 |
67 | ```bash
68 | geniml region2vec --token-folder /path/to/tokens \
69 | --save-dir ./region2vec_model \
70 | --num-shuffle 1000 \
71 | --embed-dim 100 \
72 | --context-len 50 \
73 | --window-size 5 \
74 | --init-lr 0.025
75 | ```
76 |
77 | ## Best Practices
78 |
79 | - **Parameter tuning**: Frequently tune `init_lr`, `window_size`, `num_shufflings`, and `embedding_dim` for optimal performance on your specific dataset
80 | - **Universe file**: Use a comprehensive universe file that covers all regions of interest in your analysis
81 | - **Validation**: Always validate tokenization output before proceeding to training
82 | - **Resources**: Training can be computationally intensive; monitor memory usage with large datasets
83 |
84 | ## Output
85 |
86 | The trained model saves embeddings that can be used for:
87 | - Similarity searches across genomic regions
88 | - Clustering region sets
89 | - Feature vectors for downstream ML tasks
90 | - Visualization via dimensionality reduction (t-SNE, UMAP)
91 |
--------------------------------------------------------------------------------
/scientific-skills/clinical-reports/scripts/validate_trial_report.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Validate clinical trial reports against ICH-E3 structure.
4 |
5 | Checks Clinical Study Reports (CSR) for ICH-E3 compliance.
6 |
7 | Usage:
8 | python validate_trial_report.py
9 | """
10 |
11 | import argparse
12 | import json
13 | import re
14 | from pathlib import Path
15 |
16 |
17 | ICH_E3_SECTIONS = {
18 | "title_page": "Title Page",
19 | "synopsis": "Synopsis (2)",
20 | "toc": "Table of Contents (3)",
21 | "abbreviations": "List of Abbreviations (4)",
22 | "ethics": "Ethics (Section 2)",
23 | "investigators": "Investigators and Study Administrative Structure (Section 3)",
24 | "introduction": "Introduction (Section 4)",
25 | "objectives": "Study Objectives and Plan (Section 5)",
26 | "study_patients": "Study Patients (Section 6)",
27 | "efficacy": "Efficacy Evaluation (Section 7)",
28 | "safety": "Safety Evaluation (Section 8)",
29 | "discussion": "Discussion and Overall Conclusions (Section 9)",
30 | "tables_figures": "Tables, Figures, and Graphs (Section 10)",
31 | "references": "References (Section 11)",
32 | "appendices": "Appendices (Section 12-14)",
33 | }
34 |
35 |
36 | def validate_ich_e3(filename: str) -> dict:
37 | """Validate CSR structure against ICH-E3."""
38 | with open(filename, 'r', encoding='utf-8') as f:
39 | content = f.read()
40 |
41 | results = {}
42 | for section_id, section_name in ICH_E3_SECTIONS.items():
43 | # Simple pattern matching for section headers
44 | pattern = rf"(?i)##?\s*{re.escape(section_name.split('(')[0].strip())}"
45 | found = bool(re.search(pattern, content))
46 | results[section_id] = {"name": section_name, "found": found}
47 |
48 | compliance_rate = sum(1 for r in results.values() if r["found"]) / len(results) * 100
49 |
50 | return {
51 | "filename": filename,
52 | "compliance_rate": round(compliance_rate, 1),
53 | "sections": results,
54 | "status": "PASS" if compliance_rate >= 90 else "NEEDS_REVISION"
55 | }
56 |
57 |
58 | def main():
59 | """Main entry point."""
60 | parser = argparse.ArgumentParser(description="Validate CSR against ICH-E3")
61 | parser.add_argument("input_file", help="Path to CSR file")
62 | parser.add_argument("--json", action="store_true", help="Output JSON")
63 |
64 | args = parser.parse_args()
65 |
66 | try:
67 | report = validate_ich_e3(args.input_file)
68 |
69 | if args.json:
70 | print(json.dumps(report, indent=2))
71 | else:
72 | print(f"\nICH-E3 Compliance: {report['compliance_rate']}%")
73 | print(f"Status: {report['status']}\n")
74 | print("Section Checklist:")
75 | for section, details in report["sections"].items():
76 | symbol = "✓" if details["found"] else "✗"
77 | print(f"{symbol} {details['name']}")
78 |
79 | return 0 if report["status"] == "PASS" else 1
80 |
81 | except Exception as e:
82 | print(f"Error: {e}")
83 | return 1
84 |
85 |
86 | if __name__ == "__main__":
87 | import sys
88 | sys.exit(main())
89 |
90 |
--------------------------------------------------------------------------------
/scientific-skills/modal/references/resources.md:
--------------------------------------------------------------------------------
1 | # CPU, Memory, and Disk Resources
2 |
3 | ## Default Resources
4 |
5 | Each Modal container has default reservations:
6 | - **CPU**: 0.125 cores
7 | - **Memory**: 128 MiB
8 |
9 | Containers can exceed minimum if worker has available resources.
10 |
11 | ## CPU Cores
12 |
13 | Request CPU cores as floating-point number:
14 |
15 | ```python
16 | @app.function(cpu=8.0)
17 | def my_function():
18 | # Guaranteed access to at least 8 physical cores
19 | ...
20 | ```
21 |
22 | Values correspond to physical cores, not vCPUs.
23 |
24 | Modal sets multi-threading environment variables based on CPU reservation:
25 | - `OPENBLAS_NUM_THREADS`
26 | - `OMP_NUM_THREADS`
27 | - `MKL_NUM_THREADS`
28 |
29 | ## Memory
30 |
31 | Request memory in megabytes (integer):
32 |
33 | ```python
34 | @app.function(memory=32768)
35 | def my_function():
36 | # Guaranteed access to at least 32 GiB RAM
37 | ...
38 | ```
39 |
40 | ## Resource Limits
41 |
42 | ### CPU Limits
43 |
44 | Default soft CPU limit: request + 16 cores
45 | - Default request: 0.125 cores → default limit: 16.125 cores
46 | - Above limit, host throttles CPU usage
47 |
48 | Set explicit CPU limit:
49 |
50 | ```python
51 | cpu_request = 1.0
52 | cpu_limit = 4.0
53 |
54 | @app.function(cpu=(cpu_request, cpu_limit))
55 | def f():
56 | ...
57 | ```
58 |
59 | ### Memory Limits
60 |
61 | Set hard memory limit to OOM kill containers at threshold:
62 |
63 | ```python
64 | mem_request = 1024 # MB
65 | mem_limit = 2048 # MB
66 |
67 | @app.function(memory=(mem_request, mem_limit))
68 | def f():
69 | # Container killed if exceeds 2048 MB
70 | ...
71 | ```
72 |
73 | Useful for catching memory leaks early.
74 |
75 | ### Disk Limits
76 |
77 | Running containers have access to many GBs of SSD disk, limited by:
78 | 1. Underlying worker's SSD capacity
79 | 2. Per-container disk quota (100s of GBs)
80 |
81 | Hitting limits causes `OSError` on disk writes.
82 |
83 | Request larger disk with `ephemeral_disk`:
84 |
85 | ```python
86 | @app.function(ephemeral_disk=10240) # 10 GiB
87 | def process_large_files():
88 | ...
89 | ```
90 |
91 | Maximum disk size: 3.0 TiB (3,145,728 MiB)
92 | Intended use: dataset processing
93 |
94 | ## Billing
95 |
96 | Charged based on whichever is higher: reservation or actual usage.
97 |
98 | Disk requests increase memory request at 20:1 ratio:
99 | - Requesting 500 GiB disk → increases memory request to 25 GiB (if not already higher)
100 |
101 | ## Maximum Requests
102 |
103 | Modal enforces maximums at Function creation time. Requests exceeding maximum will be rejected with `InvalidError`.
104 |
105 | Contact support if you need higher limits.
106 |
107 | ## Example: Resource Configuration
108 |
109 | ```python
110 | @app.function(
111 | cpu=4.0, # 4 physical cores
112 | memory=16384, # 16 GiB RAM
113 | ephemeral_disk=51200, # 50 GiB disk
114 | timeout=3600, # 1 hour timeout
115 | )
116 | def process_data():
117 | # Heavy processing with large files
118 | ...
119 | ```
120 |
121 | ## Monitoring Resource Usage
122 |
123 | View resource usage in Modal dashboard:
124 | - CPU utilization
125 | - Memory usage
126 | - Disk usage
127 | - GPU metrics (if applicable)
128 |
129 | Access via https://modal.com/apps
130 |
--------------------------------------------------------------------------------
/scientific-skills/arboreto/scripts/basic_grn_inference.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Basic GRN inference example using Arboreto.
4 |
5 | This script demonstrates the standard workflow for inferring gene regulatory
6 | networks from expression data using GRNBoost2.
7 |
8 | Usage:
9 | python basic_grn_inference.py [--tf-file TF_FILE] [--seed SEED]
10 |
11 | Arguments:
12 | expression_file: Path to expression matrix (TSV format, genes as columns)
13 | output_file: Path for output network (TSV format)
14 | --tf-file: Optional path to transcription factors file (one per line)
15 | --seed: Random seed for reproducibility (default: 777)
16 | """
17 |
18 | import argparse
19 | import pandas as pd
20 | from arboreto.algo import grnboost2
21 | from arboreto.utils import load_tf_names
22 |
23 |
24 | def run_grn_inference(expression_file, output_file, tf_file=None, seed=777):
25 | """
26 | Run GRN inference using GRNBoost2.
27 |
28 | Args:
29 | expression_file: Path to expression matrix TSV file
30 | output_file: Path for output network file
31 | tf_file: Optional path to TF names file
32 | seed: Random seed for reproducibility
33 | """
34 | print(f"Loading expression data from {expression_file}...")
35 | expression_data = pd.read_csv(expression_file, sep='\t')
36 |
37 | print(f"Expression matrix shape: {expression_data.shape}")
38 | print(f"Number of genes: {expression_data.shape[1]}")
39 | print(f"Number of observations: {expression_data.shape[0]}")
40 |
41 | # Load TF names if provided
42 | tf_names = 'all'
43 | if tf_file:
44 | print(f"Loading transcription factors from {tf_file}...")
45 | tf_names = load_tf_names(tf_file)
46 | print(f"Number of TFs: {len(tf_names)}")
47 |
48 | # Run GRN inference
49 | print(f"Running GRNBoost2 with seed={seed}...")
50 | network = grnboost2(
51 | expression_data=expression_data,
52 | tf_names=tf_names,
53 | seed=seed,
54 | verbose=True
55 | )
56 |
57 | # Save results
58 | print(f"Saving network to {output_file}...")
59 | network.to_csv(output_file, sep='\t', index=False, header=False)
60 |
61 | print(f"Done! Network contains {len(network)} regulatory links.")
62 | print(f"\nTop 10 regulatory links:")
63 | print(network.head(10).to_string(index=False))
64 |
65 |
66 | if __name__ == '__main__':
67 | parser = argparse.ArgumentParser(
68 | description='Infer gene regulatory network using GRNBoost2'
69 | )
70 | parser.add_argument(
71 | 'expression_file',
72 | help='Path to expression matrix (TSV format, genes as columns)'
73 | )
74 | parser.add_argument(
75 | 'output_file',
76 | help='Path for output network (TSV format)'
77 | )
78 | parser.add_argument(
79 | '--tf-file',
80 | help='Path to transcription factors file (one per line)',
81 | default=None
82 | )
83 | parser.add_argument(
84 | '--seed',
85 | help='Random seed for reproducibility (default: 777)',
86 | type=int,
87 | default=777
88 | )
89 |
90 | args = parser.parse_args()
91 |
92 | run_grn_inference(
93 | expression_file=args.expression_file,
94 | output_file=args.output_file,
95 | tf_file=args.tf_file,
96 | seed=args.seed
97 | )
98 |
--------------------------------------------------------------------------------
/scientific-skills/scientific-schematics/example_usage.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Example usage of AI-powered scientific schematic generation
3 | #
4 | # Prerequisites:
5 | # 1. Set OPENROUTER_API_KEY environment variable
6 | # 2. Ensure Python 3.10+ is installed
7 | # 3. Install requests: pip install requests
8 |
9 | set -e
10 |
11 | echo "=========================================="
12 | echo "Scientific Schematics - AI Generation"
13 | echo "Example Usage Demonstrations"
14 | echo "=========================================="
15 | echo ""
16 |
17 | # Check for API key
18 | if [ -z "$OPENROUTER_API_KEY" ]; then
19 | echo "❌ Error: OPENROUTER_API_KEY environment variable not set"
20 | echo ""
21 | echo "Get an API key at: https://openrouter.ai/keys"
22 | echo "Then set it with: export OPENROUTER_API_KEY='your_key'"
23 | exit 1
24 | fi
25 |
26 | echo "✓ OPENROUTER_API_KEY is set"
27 | echo ""
28 |
29 | # Create output directory
30 | mkdir -p figures
31 | echo "✓ Created figures/ directory"
32 | echo ""
33 |
34 | # Example 1: Simple flowchart
35 | echo "Example 1: CONSORT Flowchart"
36 | echo "----------------------------"
37 | python scripts/generate_schematic.py \
38 | "CONSORT participant flow diagram. Assessed for eligibility (n=500). Excluded (n=150) with reasons: age<18 (n=80), declined (n=50), other (n=20). Randomized (n=350) into Treatment (n=175) and Control (n=175). Lost to follow-up: 15 and 10. Final analysis: 160 and 165." \
39 | -o figures/consort_example.png \
40 | --iterations 2
41 |
42 | echo ""
43 | echo "✓ Generated: figures/consort_example.png"
44 | echo " - Also created: consort_example_v1.png, v2.png, v3.png"
45 | echo " - Review log: consort_example_review_log.json"
46 | echo ""
47 |
48 | # Example 2: Neural network (shorter for demo)
49 | echo "Example 2: Simple Neural Network"
50 | echo "--------------------------------"
51 | python scripts/generate_schematic.py \
52 | "Simple feedforward neural network diagram. Input layer with 4 nodes, hidden layer with 6 nodes, output layer with 2 nodes. Show all connections. Label layers clearly." \
53 | -o figures/neural_net_example.png \
54 | --iterations 2
55 |
56 | echo ""
57 | echo "✓ Generated: figures/neural_net_example.png"
58 | echo ""
59 |
60 | # Example 3: Biological pathway (minimal)
61 | echo "Example 3: Signaling Pathway"
62 | echo "---------------------------"
63 | python scripts/generate_schematic.py \
64 | "Simple signaling pathway: Receptor → Kinase A → Kinase B → Transcription Factor → Gene. Show arrows with 'activation' labels. Use different colors for each component." \
65 | -o figures/pathway_example.png \
66 | --iterations 2
67 |
68 | echo ""
69 | echo "✓ Generated: figures/pathway_example.png"
70 | echo ""
71 |
72 | echo "=========================================="
73 | echo "All examples completed successfully!"
74 | echo "=========================================="
75 | echo ""
76 | echo "Generated files in figures/:"
77 | ls -lh figures/*example*.png 2>/dev/null || echo " (Files will appear after running with valid API key)"
78 | echo ""
79 | echo "Review the review_log.json files to see:"
80 | echo " - Quality scores for each iteration"
81 | echo " - Detailed critiques and suggestions"
82 | echo " - Improvement progression"
83 | echo ""
84 | echo "Next steps:"
85 | echo " 1. View the generated images"
86 | echo " 2. Review the quality scores in *_review_log.json"
87 | echo " 3. Try your own prompts!"
88 | echo ""
89 |
90 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pdf/scripts/check_bounding_boxes.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | import json
3 | import sys
4 |
5 |
6 | # Script to check that the `fields.json` file that Claude creates when analyzing PDFs
7 | # does not have overlapping bounding boxes. See forms.md.
8 |
9 |
10 | @dataclass
11 | class RectAndField:
12 | rect: list[float]
13 | rect_type: str
14 | field: dict
15 |
16 |
17 | # Returns a list of messages that are printed to stdout for Claude to read.
18 | def get_bounding_box_messages(fields_json_stream) -> list[str]:
19 | messages = []
20 | fields = json.load(fields_json_stream)
21 | messages.append(f"Read {len(fields['form_fields'])} fields")
22 |
23 | def rects_intersect(r1, r2):
24 | disjoint_horizontal = r1[0] >= r2[2] or r1[2] <= r2[0]
25 | disjoint_vertical = r1[1] >= r2[3] or r1[3] <= r2[1]
26 | return not (disjoint_horizontal or disjoint_vertical)
27 |
28 | rects_and_fields = []
29 | for f in fields["form_fields"]:
30 | rects_and_fields.append(RectAndField(f["label_bounding_box"], "label", f))
31 | rects_and_fields.append(RectAndField(f["entry_bounding_box"], "entry", f))
32 |
33 | has_error = False
34 | for i, ri in enumerate(rects_and_fields):
35 | # This is O(N^2); we can optimize if it becomes a problem.
36 | for j in range(i + 1, len(rects_and_fields)):
37 | rj = rects_and_fields[j]
38 | if ri.field["page_number"] == rj.field["page_number"] and rects_intersect(ri.rect, rj.rect):
39 | has_error = True
40 | if ri.field is rj.field:
41 | messages.append(f"FAILURE: intersection between label and entry bounding boxes for `{ri.field['description']}` ({ri.rect}, {rj.rect})")
42 | else:
43 | messages.append(f"FAILURE: intersection between {ri.rect_type} bounding box for `{ri.field['description']}` ({ri.rect}) and {rj.rect_type} bounding box for `{rj.field['description']}` ({rj.rect})")
44 | if len(messages) >= 20:
45 | messages.append("Aborting further checks; fix bounding boxes and try again")
46 | return messages
47 | if ri.rect_type == "entry":
48 | if "entry_text" in ri.field:
49 | font_size = ri.field["entry_text"].get("font_size", 14)
50 | entry_height = ri.rect[3] - ri.rect[1]
51 | if entry_height < font_size:
52 | has_error = True
53 | messages.append(f"FAILURE: entry bounding box height ({entry_height}) for `{ri.field['description']}` is too short for the text content (font size: {font_size}). Increase the box height or decrease the font size.")
54 | if len(messages) >= 20:
55 | messages.append("Aborting further checks; fix bounding boxes and try again")
56 | return messages
57 |
58 | if not has_error:
59 | messages.append("SUCCESS: All bounding boxes are valid")
60 | return messages
61 |
62 | if __name__ == "__main__":
63 | if len(sys.argv) != 2:
64 | print("Usage: check_bounding_boxes.py [fields.json]")
65 | sys.exit(1)
66 | # Input file should be in the `fields.json` format described in forms.md.
67 | with open(sys.argv[1]) as f:
68 | messages = get_bounding_box_messages(f)
69 | for msg in messages:
70 | print(msg)
71 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/mce/mc.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/mce/mc.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/scientific-skills/opentrons-integration/scripts/serial_dilution_template.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Serial Dilution Protocol Template
4 |
5 | This template demonstrates how to perform a serial dilution across a plate row.
6 | Useful for creating concentration gradients for assays.
7 | """
8 |
9 | from opentrons import protocol_api
10 |
11 | metadata = {
12 | 'protocolName': 'Serial Dilution Template',
13 | 'author': 'Opentrons',
14 | 'description': 'Serial dilution protocol for creating concentration gradients',
15 | 'apiLevel': '2.19'
16 | }
17 |
18 | requirements = {
19 | 'robotType': 'Flex',
20 | 'apiLevel': '2.19'
21 | }
22 |
23 | def run(protocol: protocol_api.ProtocolContext):
24 | """
25 | Performs a serial dilution across plate rows.
26 |
27 | Protocol performs:
28 | 1. Adds diluent to all wells except the first column
29 | 2. Transfers stock solution to first column
30 | 3. Performs serial dilutions across rows
31 | """
32 |
33 | # Load labware
34 | tips = protocol.load_labware('opentrons_flex_96_tiprack_200ul', 'D1')
35 | reservoir = protocol.load_labware('nest_12_reservoir_15ml', 'D2', label='Reservoir')
36 | plate = protocol.load_labware('corning_96_wellplate_360ul_flat', 'D3', label='Dilution Plate')
37 |
38 | # Load pipette
39 | p300 = protocol.load_instrument('p300_single_flex', 'left', tip_racks=[tips])
40 |
41 | # Define liquids (optional, for visualization)
42 | diluent = protocol.define_liquid(
43 | name='Diluent',
44 | description='Buffer or growth media',
45 | display_color='#B0E0E6'
46 | )
47 |
48 | stock = protocol.define_liquid(
49 | name='Stock Solution',
50 | description='Concentrated stock',
51 | display_color='#FF6347'
52 | )
53 |
54 | # Load liquids into wells
55 | reservoir['A1'].load_liquid(liquid=diluent, volume=15000)
56 | reservoir['A2'].load_liquid(liquid=stock, volume=5000)
57 |
58 | # Protocol parameters
59 | dilution_factor = 2 # 1:2 dilution
60 | transfer_volume = 100 # µL
61 | num_dilutions = 11 # Number of dilution steps
62 |
63 | protocol.comment('Starting serial dilution protocol')
64 |
65 | # Step 1: Add diluent to all wells except first column
66 | protocol.comment('Adding diluent to wells...')
67 | for row in plate.rows()[:8]: # For each row (A-H)
68 | p300.transfer(
69 | transfer_volume,
70 | reservoir['A1'], # Diluent source
71 | row[1:], # All wells except first (columns 2-12)
72 | new_tip='once'
73 | )
74 |
75 | # Step 2: Add stock solution to first column
76 | protocol.comment('Adding stock solution to first column...')
77 | p300.transfer(
78 | transfer_volume * 2, # Double volume for first well
79 | reservoir['A2'], # Stock source
80 | [row[0] for row in plate.rows()[:8]], # First column (wells A1-H1)
81 | new_tip='always'
82 | )
83 |
84 | # Step 3: Perform serial dilution
85 | protocol.comment('Performing serial dilutions...')
86 | for row in plate.rows()[:8]: # For each row
87 | p300.transfer(
88 | transfer_volume,
89 | row[:num_dilutions], # Source wells (1-11)
90 | row[1:num_dilutions + 1], # Destination wells (2-12)
91 | mix_after=(3, 50), # Mix 3x with 50µL after each transfer
92 | new_tip='always'
93 | )
94 |
95 | protocol.comment('Serial dilution complete!')
96 | protocol.comment(f'Created {num_dilutions} dilutions with {dilution_factor}x dilution factor')
97 |
--------------------------------------------------------------------------------
/scientific-skills/clinical-reports/scripts/format_adverse_events.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Format adverse event data into tables for clinical trial reports.
4 |
5 | Converts CSV or structured data into formatted AE summary tables.
6 |
7 | Usage:
8 | python format_adverse_events.py
9 | """
10 |
11 | import argparse
12 | import csv
13 | from collections import defaultdict
14 | from pathlib import Path
15 |
16 |
17 | def format_ae_summary_table(data: list) -> str:
18 | """Generate AE summary table in markdown format."""
19 | # Group by treatment arm
20 | arm_stats = defaultdict(lambda: {
21 | 'total': 0,
22 | 'any_ae': 0,
23 | 'related_ae': 0,
24 | 'sae': 0,
25 | 'deaths': 0,
26 | 'discontinuations': 0
27 | })
28 |
29 | for row in data:
30 | arm = row.get('treatment_arm', 'Unknown')
31 | arm_stats[arm]['total'] += 1
32 |
33 | if row.get('any_ae', '').lower() == 'yes':
34 | arm_stats[arm]['any_ae'] += 1
35 | if row.get('related', '').lower() == 'yes':
36 | arm_stats[arm]['related_ae'] += 1
37 | if row.get('serious', '').lower() == 'yes':
38 | arm_stats[arm]['sae'] += 1
39 | if row.get('fatal', '').lower() == 'yes':
40 | arm_stats[arm]['deaths'] += 1
41 | if row.get('discontinuation', '').lower() == 'yes':
42 | arm_stats[arm]['discontinuations'] += 1
43 |
44 | # Generate table
45 | table = "| Category | " + " | ".join(arm_stats.keys()) + " |\n"
46 | table += "|----------|" + "|".join(["--------"] * len(arm_stats)) + "|\n"
47 |
48 | categories = [
49 | ('Total N', 'total'),
50 | ('Any AE', 'any_ae'),
51 | ('Treatment-related AE', 'related_ae'),
52 | ('Serious AE', 'sae'),
53 | ('Deaths', 'deaths'),
54 | ('Discontinuation due to AE', 'discontinuations')
55 | ]
56 |
57 | for cat_name, cat_key in categories:
58 | row_data = [cat_name]
59 | for arm_data in arm_stats.values():
60 | count = arm_data[cat_key]
61 | total = arm_data['total']
62 | pct = (count / total * 100) if total > 0 and cat_key != 'total' else 0
63 | value = f"{count}" if cat_key == 'total' else f"{count} ({pct:.1f}%)"
64 | row_data.append(value)
65 | table += "| " + " | ".join(row_data) + " |\n"
66 |
67 | return table
68 |
69 |
70 | def main():
71 | """Main entry point."""
72 | parser = argparse.ArgumentParser(description="Format AE data into tables")
73 | parser.add_argument("input_file", help="Path to AE data CSV")
74 | parser.add_argument("--output", "-o", help="Output markdown file")
75 |
76 | args = parser.parse_args()
77 |
78 | try:
79 | with open(args.input_file, 'r') as f:
80 | reader = csv.DictReader(f)
81 | data = list(reader)
82 |
83 | table = format_ae_summary_table(data)
84 |
85 | if args.output:
86 | with open(args.output, 'w') as f:
87 | f.write(table)
88 | print(f"✓ Table saved to: {args.output}")
89 | else:
90 | print("\nAdverse Events Summary Table:\n")
91 | print(table)
92 |
93 | return 0
94 |
95 | except Exception as e:
96 | print(f"Error: {e}")
97 | return 1
98 |
99 |
100 | if __name__ == "__main__":
101 | import sys
102 | sys.exit(main())
103 |
104 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd:
--------------------------------------------------------------------------------
1 |
2 |
7 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Create Release
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | paths:
8 | - '.claude-plugin/marketplace.json'
9 | workflow_dispatch:
10 |
11 | permissions:
12 | contents: write
13 |
14 | jobs:
15 | release:
16 | runs-on: ubuntu-latest
17 |
18 | steps:
19 | - name: Checkout repository
20 | uses: actions/checkout@v4
21 | with:
22 | fetch-depth: 0 # Fetch all history for release notes
23 |
24 | - name: Extract version from marketplace.json
25 | id: get_version
26 | run: |
27 | VERSION=$(jq -r '.metadata.version' .claude-plugin/marketplace.json)
28 | echo "version=$VERSION" >> $GITHUB_OUTPUT
29 | echo "tag=v$VERSION" >> $GITHUB_OUTPUT
30 | echo "Extracted version: $VERSION"
31 |
32 | - name: Check if tag already exists
33 | id: check_tag
34 | run: |
35 | if git rev-parse "v${{ steps.get_version.outputs.version }}" >/dev/null 2>&1; then
36 | echo "exists=true" >> $GITHUB_OUTPUT
37 | echo "Tag v${{ steps.get_version.outputs.version }} already exists"
38 | else
39 | echo "exists=false" >> $GITHUB_OUTPUT
40 | echo "Tag v${{ steps.get_version.outputs.version }} does not exist"
41 | fi
42 |
43 | - name: Get previous tag
44 | id: previous_tag
45 | if: steps.check_tag.outputs.exists == 'false'
46 | run: |
47 | PREVIOUS_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
48 | if [ -z "$PREVIOUS_TAG" ]; then
49 | echo "previous_tag=" >> $GITHUB_OUTPUT
50 | echo "No previous tag found"
51 | else
52 | echo "previous_tag=$PREVIOUS_TAG" >> $GITHUB_OUTPUT
53 | echo "Previous tag: $PREVIOUS_TAG"
54 | fi
55 |
56 | - name: Generate release notes
57 | id: release_notes
58 | if: steps.check_tag.outputs.exists == 'false'
59 | run: |
60 | PREVIOUS_TAG="${{ steps.previous_tag.outputs.previous_tag }}"
61 |
62 | # Start release notes
63 | cat > release_notes.md << 'EOF'
64 | ## What's Changed
65 |
66 | EOF
67 |
68 | # Generate changelog from commits
69 | if [ -n "$PREVIOUS_TAG" ]; then
70 | echo "Changes since $PREVIOUS_TAG:" >> release_notes.md
71 | echo "" >> release_notes.md
72 |
73 | # Get commits with nice formatting
74 | git log ${PREVIOUS_TAG}..HEAD --pretty=format:"* %s (%h)" --no-merges >> release_notes.md
75 | else
76 | echo "Initial release of Claude Scientific Skills" >> release_notes.md
77 | echo "" >> release_notes.md
78 | echo "This release includes:" >> release_notes.md
79 | git log --pretty=format:"* %s (%h)" --no-merges --max-count=20 >> release_notes.md
80 | fi
81 |
82 | cat release_notes.md
83 |
84 | - name: Create Release
85 | if: steps.check_tag.outputs.exists == 'false'
86 | uses: softprops/action-gh-release@v1
87 | with:
88 | tag_name: ${{ steps.get_version.outputs.tag }}
89 | name: v${{ steps.get_version.outputs.version }}
90 | body_path: release_notes.md
91 | draft: false
92 | prerelease: false
93 | generate_release_notes: false
94 | env:
95 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
96 |
97 | - name: Skip release creation
98 | if: steps.check_tag.outputs.exists == 'true'
99 | run: |
100 | echo "Release v${{ steps.get_version.outputs.version }} already exists. Skipping release creation."
101 |
102 |
--------------------------------------------------------------------------------
/scientific-skills/protocolsio-integration/references/authentication.md:
--------------------------------------------------------------------------------
1 | # Protocols.io Authentication
2 |
3 | ## Overview
4 |
5 | The protocols.io API supports two types of access tokens for authentication, enabling access to both public and private content.
6 |
7 | ## Access Token Types
8 |
9 | ### 1. CLIENT_ACCESS_TOKEN
10 |
11 | - **Purpose**: Enables access to public content and the private content of the client user
12 | - **Use case**: When accessing your own protocols and public protocols
13 | - **Scope**: Limited to the token owner's private content plus all public content
14 |
15 | ### 2. OAUTH_ACCESS_TOKEN
16 |
17 | - **Purpose**: Grants access to specific users' private content plus all public content
18 | - **Use case**: When building applications that need to access other users' content with their permission
19 | - **Scope**: Full access to authorized user's private content plus all public content
20 |
21 | ## Authentication Header
22 |
23 | All API requests must include an Authorization header:
24 |
25 | ```
26 | Authorization: Bearer [ACCESS_TOKEN]
27 | ```
28 |
29 | ## OAuth Flow
30 |
31 | ### Step 1: Generate Authorization Link
32 |
33 | Direct users to the authorization URL to grant access:
34 |
35 | ```
36 | GET https://protocols.io/api/v3/oauth/authorize
37 | ```
38 |
39 | **Parameters:**
40 | - `client_id` (required): Your application's client ID
41 | - `redirect_uri` (required): URL to redirect users after authorization
42 | - `response_type` (required): Set to "code"
43 | - `state` (optional but recommended): Random string to prevent CSRF attacks
44 |
45 | **Example:**
46 | ```
47 | https://protocols.io/api/v3/oauth/authorize?client_id=YOUR_CLIENT_ID&redirect_uri=YOUR_REDIRECT_URI&response_type=code&state=RANDOM_STRING
48 | ```
49 |
50 | ### Step 2: Exchange Authorization Code for Token
51 |
52 | After user authorization, protocols.io redirects to your `redirect_uri` with an authorization code. Exchange this code for an access token:
53 |
54 | ```
55 | POST https://protocols.io/api/v3/oauth/token
56 | ```
57 |
58 | **Parameters:**
59 | - `grant_type`: Set to "authorization_code"
60 | - `code`: The authorization code received
61 | - `client_id`: Your application's client ID
62 | - `client_secret`: Your application's client secret
63 | - `redirect_uri`: Must match the redirect_uri used in Step 1
64 |
65 | **Response includes:**
66 | - `access_token`: The OAuth access token to use for API requests
67 | - `token_type`: "Bearer"
68 | - `expires_in`: Token lifetime in seconds (typically 1 year)
69 | - `refresh_token`: Token for refreshing the access token
70 |
71 | ### Step 3: Refresh Access Token
72 |
73 | Before the access token expires (typically 1 year), use the refresh token to obtain a new access token:
74 |
75 | ```
76 | POST https://protocols.io/api/v3/oauth/token
77 | ```
78 |
79 | **Parameters:**
80 | - `grant_type`: Set to "refresh_token"
81 | - `refresh_token`: The refresh token received in Step 2
82 | - `client_id`: Your application's client ID
83 | - `client_secret`: Your application's client secret
84 |
85 | ## Rate Limits
86 |
87 | Be aware of rate limiting when making API requests:
88 |
89 | - **Standard endpoints**: 100 requests per minute per user
90 | - **PDF endpoint** (`/view/[protocol-uri].pdf`):
91 | - Signed-in users: 5 requests per minute
92 | - Unsigned users: 3 requests per minute
93 |
94 | ## Best Practices
95 |
96 | 1. **Store tokens securely**: Never expose access tokens in client-side code or version control
97 | 2. **Handle token expiration**: Implement automatic token refresh before expiration
98 | 3. **Respect rate limits**: Implement exponential backoff for rate limit errors
99 | 4. **Use state parameter**: Always include a state parameter in OAuth flow for security
100 | 5. **Validate redirect_uri**: Ensure redirect URIs match exactly between authorization and token requests
101 |
--------------------------------------------------------------------------------
/scientific-skills/gtars/references/refget.md:
--------------------------------------------------------------------------------
1 | # Reference Sequence Management
2 |
3 | The refget module handles reference sequence retrieval and digest computation, following the refget protocol for sequence identification.
4 |
5 | ## RefgetStore
6 |
7 | RefgetStore manages reference sequences and their digests:
8 |
9 | ```python
10 | import gtars
11 |
12 | # Create RefgetStore
13 | store = gtars.RefgetStore()
14 |
15 | # Add sequence
16 | store.add_sequence("chr1", sequence_data)
17 |
18 | # Retrieve sequence
19 | seq = store.get_sequence("chr1")
20 |
21 | # Get sequence digest
22 | digest = store.get_digest("chr1")
23 | ```
24 |
25 | ## Sequence Digests
26 |
27 | Compute and verify sequence digests:
28 |
29 | ```python
30 | # Compute digest for sequence
31 | from gtars.refget import compute_digest
32 |
33 | digest = compute_digest(sequence_data)
34 |
35 | # Verify digest matches
36 | is_valid = store.verify_digest("chr1", expected_digest)
37 | ```
38 |
39 | ## Integration with Reference Genomes
40 |
41 | Work with standard reference genomes:
42 |
43 | ```python
44 | # Load reference genome
45 | store = gtars.RefgetStore.from_fasta("hg38.fa")
46 |
47 | # Get chromosome sequences
48 | chr1 = store.get_sequence("chr1")
49 | chr2 = store.get_sequence("chr2")
50 |
51 | # Get subsequence
52 | region_seq = store.get_subsequence("chr1", 1000, 2000)
53 | ```
54 |
55 | ## CLI Usage
56 |
57 | Manage reference sequences from command line:
58 |
59 | ```bash
60 | # Compute digest for FASTA file
61 | gtars refget digest --input genome.fa --output digests.txt
62 |
63 | # Verify sequence digest
64 | gtars refget verify --sequence sequence.fa --digest expected_digest
65 | ```
66 |
67 | ## Refget Protocol Compliance
68 |
69 | The refget module follows the GA4GH refget protocol:
70 |
71 | ### Digest Computation
72 |
73 | Digests are computed using SHA-512 truncated to 48 bytes:
74 |
75 | ```python
76 | # Compute refget-compliant digest
77 | digest = gtars.refget.compute_digest(sequence)
78 | # Returns: "SQ.abc123..."
79 | ```
80 |
81 | ### Sequence Retrieval
82 |
83 | Retrieve sequences by digest:
84 |
85 | ```python
86 | # Get sequence by refget digest
87 | seq = store.get_sequence_by_digest("SQ.abc123...")
88 | ```
89 |
90 | ## Use Cases
91 |
92 | ### Reference Validation
93 |
94 | Verify reference genome integrity:
95 |
96 | ```python
97 | # Compute digests for reference
98 | store = gtars.RefgetStore.from_fasta("reference.fa")
99 | digests = {chrom: store.get_digest(chrom) for chrom in store.chromosomes}
100 |
101 | # Compare with expected digests
102 | for chrom, expected in expected_digests.items():
103 | actual = digests[chrom]
104 | if actual != expected:
105 | print(f"Mismatch for {chrom}: {actual} != {expected}")
106 | ```
107 |
108 | ### Sequence Extraction
109 |
110 | Extract specific genomic regions:
111 |
112 | ```python
113 | # Extract regions of interest
114 | store = gtars.RefgetStore.from_fasta("hg38.fa")
115 |
116 | regions = [
117 | ("chr1", 1000, 2000),
118 | ("chr2", 5000, 6000),
119 | ("chr3", 10000, 11000)
120 | ]
121 |
122 | sequences = [store.get_subsequence(c, s, e) for c, s, e in regions]
123 | ```
124 |
125 | ### Cross-Reference Comparison
126 |
127 | Compare sequences across different references:
128 |
129 | ```python
130 | # Load two reference versions
131 | hg19 = gtars.RefgetStore.from_fasta("hg19.fa")
132 | hg38 = gtars.RefgetStore.from_fasta("hg38.fa")
133 |
134 | # Compare digests
135 | for chrom in hg19.chromosomes:
136 | digest_19 = hg19.get_digest(chrom)
137 | digest_38 = hg38.get_digest(chrom)
138 | if digest_19 != digest_38:
139 | print(f"{chrom} differs between hg19 and hg38")
140 | ```
141 |
142 | ## Performance Notes
143 |
144 | - Sequences loaded on demand
145 | - Digests cached after computation
146 | - Efficient subsequence extraction
147 | - Memory-mapped file support for large genomes
148 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/microsoft/wml-2012.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/pdf/scripts/fill_pdf_form_with_annotations.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 |
4 | from pypdf import PdfReader, PdfWriter
5 | from pypdf.annotations import FreeText
6 |
7 |
8 | # Fills a PDF by adding text annotations defined in `fields.json`. See forms.md.
9 |
10 |
11 | def transform_coordinates(bbox, image_width, image_height, pdf_width, pdf_height):
12 | """Transform bounding box from image coordinates to PDF coordinates"""
13 | # Image coordinates: origin at top-left, y increases downward
14 | # PDF coordinates: origin at bottom-left, y increases upward
15 | x_scale = pdf_width / image_width
16 | y_scale = pdf_height / image_height
17 |
18 | left = bbox[0] * x_scale
19 | right = bbox[2] * x_scale
20 |
21 | # Flip Y coordinates for PDF
22 | top = pdf_height - (bbox[1] * y_scale)
23 | bottom = pdf_height - (bbox[3] * y_scale)
24 |
25 | return left, bottom, right, top
26 |
27 |
28 | def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
29 | """Fill the PDF form with data from fields.json"""
30 |
31 | # `fields.json` format described in forms.md.
32 | with open(fields_json_path, "r") as f:
33 | fields_data = json.load(f)
34 |
35 | # Open the PDF
36 | reader = PdfReader(input_pdf_path)
37 | writer = PdfWriter()
38 |
39 | # Copy all pages to writer
40 | writer.append(reader)
41 |
42 | # Get PDF dimensions for each page
43 | pdf_dimensions = {}
44 | for i, page in enumerate(reader.pages):
45 | mediabox = page.mediabox
46 | pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]
47 |
48 | # Process each form field
49 | annotations = []
50 | for field in fields_data["form_fields"]:
51 | page_num = field["page_number"]
52 |
53 | # Get page dimensions and transform coordinates.
54 | page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)
55 | image_width = page_info["image_width"]
56 | image_height = page_info["image_height"]
57 | pdf_width, pdf_height = pdf_dimensions[page_num]
58 |
59 | transformed_entry_box = transform_coordinates(
60 | field["entry_bounding_box"],
61 | image_width, image_height,
62 | pdf_width, pdf_height
63 | )
64 |
65 | # Skip empty fields
66 | if "entry_text" not in field or "text" not in field["entry_text"]:
67 | continue
68 | entry_text = field["entry_text"]
69 | text = entry_text["text"]
70 | if not text:
71 | continue
72 |
73 | font_name = entry_text.get("font", "Arial")
74 | font_size = str(entry_text.get("font_size", 14)) + "pt"
75 | font_color = entry_text.get("font_color", "000000")
76 |
77 | # Font size/color seems to not work reliably across viewers:
78 | # https://github.com/py-pdf/pypdf/issues/2084
79 | annotation = FreeText(
80 | text=text,
81 | rect=transformed_entry_box,
82 | font=font_name,
83 | font_size=font_size,
84 | font_color=font_color,
85 | border_color=None,
86 | background_color=None,
87 | )
88 | annotations.append(annotation)
89 | # page_number is 0-based for pypdf
90 | writer.add_annotation(page_number=page_num - 1, annotation=annotation)
91 |
92 | # Save the filled PDF
93 | with open(output_pdf_path, "wb") as output:
94 | writer.write(output)
95 |
96 | print(f"Successfully filled PDF form and saved to {output_pdf_path}")
97 | print(f"Added {len(annotations)} text annotations")
98 |
99 |
100 | if __name__ == "__main__":
101 | if len(sys.argv) != 4:
102 | print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")
103 | sys.exit(1)
104 | input_pdf = sys.argv[1]
105 | fields_json = sys.argv[2]
106 | output_pdf = sys.argv[3]
107 |
108 | fill_pdf_form(input_pdf, fields_json, output_pdf)
--------------------------------------------------------------------------------
/scientific-skills/adaptyv/SKILL.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: adaptyv
3 | description: Cloud laboratory platform for automated protein testing and validation. Use when designing proteins and needing experimental validation including binding assays, expression testing, thermostability measurements, enzyme activity assays, or protein sequence optimization. Also use for submitting experiments via API, tracking experiment status, downloading results, optimizing protein sequences for better expression using computational tools (NetSolP, SoluProt, SolubleMPNN, ESM), or managing protein design workflows with wet-lab validation.
4 | ---
5 |
6 | # Adaptyv
7 |
8 | Adaptyv is a cloud laboratory platform that provides automated protein testing and validation services. Submit protein sequences via API or web interface and receive experimental results in approximately 21 days.
9 |
10 | ## Quick Start
11 |
12 | ### Authentication Setup
13 |
14 | Adaptyv requires API authentication. Set up your credentials:
15 |
16 | 1. Contact support@adaptyvbio.com to request API access (platform is in alpha/beta)
17 | 2. Receive your API access token
18 | 3. Set environment variable:
19 |
20 | ```bash
21 | export ADAPTYV_API_KEY="your_api_key_here"
22 | ```
23 |
24 | Or create a `.env` file:
25 |
26 | ```
27 | ADAPTYV_API_KEY=your_api_key_here
28 | ```
29 |
30 | ### Installation
31 |
32 | Install the required package using uv:
33 |
34 | ```bash
35 | uv pip install requests python-dotenv
36 | ```
37 |
38 | ### Basic Usage
39 |
40 | Submit protein sequences for testing:
41 |
42 | ```python
43 | import os
44 | import requests
45 | from dotenv import load_dotenv
46 |
47 | load_dotenv()
48 |
49 | api_key = os.getenv("ADAPTYV_API_KEY")
50 | base_url = "https://kq5jp7qj7wdqklhsxmovkzn4l40obksv.lambda-url.eu-central-1.on.aws"
51 |
52 | headers = {
53 | "Authorization": f"Bearer {api_key}",
54 | "Content-Type": "application/json"
55 | }
56 |
57 | # Submit experiment
58 | response = requests.post(
59 | f"{base_url}/experiments",
60 | headers=headers,
61 | json={
62 | "sequences": ">protein1\nMKVLWALLGLLGAA...",
63 | "experiment_type": "binding",
64 | "webhook_url": "https://your-webhook.com/callback"
65 | }
66 | )
67 |
68 | experiment_id = response.json()["experiment_id"]
69 | ```
70 |
71 | ## Available Experiment Types
72 |
73 | Adaptyv supports multiple assay types:
74 |
75 | - **Binding assays** - Test protein-target interactions using biolayer interferometry
76 | - **Expression testing** - Measure protein expression levels
77 | - **Thermostability** - Characterize protein thermal stability
78 | - **Enzyme activity** - Assess enzymatic function
79 |
80 | See `reference/experiments.md` for detailed information on each experiment type and workflows.
81 |
82 | ## Protein Sequence Optimization
83 |
84 | Before submitting sequences, optimize them for better expression and stability:
85 |
86 | **Common issues to address:**
87 | - Unpaired cysteines that create unwanted disulfides
88 | - Excessive hydrophobic regions causing aggregation
89 | - Poor solubility predictions
90 |
91 | **Recommended tools:**
92 | - NetSolP / SoluProt - Initial solubility filtering
93 | - SolubleMPNN - Sequence redesign for improved solubility
94 | - ESM - Sequence likelihood scoring
95 | - ipTM - Interface stability assessment
96 | - pSAE - Hydrophobic exposure quantification
97 |
98 | See `reference/protein_optimization.md` for detailed optimization workflows and tool usage.
99 |
100 | ## API Reference
101 |
102 | For complete API documentation including all endpoints, request/response formats, and authentication details, see `reference/api_reference.md`.
103 |
104 | ## Examples
105 |
106 | For concrete code examples covering common use cases (experiment submission, status tracking, result retrieval, batch processing), see `reference/examples.md`.
107 |
108 | ## Important Notes
109 |
110 | - Platform is currently in alpha/beta phase with features subject to change
111 | - Not all platform features are available via API yet
112 | - Results typically delivered in ~21 days
113 | - Contact support@adaptyvbio.com for access requests or questions
114 | - Suitable for high-throughput AI-driven protein design workflows
115 |
--------------------------------------------------------------------------------
/scientific-skills/deeptools/references/effective_genome_sizes.md:
--------------------------------------------------------------------------------
1 | # Effective Genome Sizes
2 |
3 | ## Definition
4 |
5 | Effective genome size refers to the length of the "mappable" genome - regions that can be uniquely mapped by sequencing reads. This metric is crucial for proper normalization in many deepTools commands.
6 |
7 | ## Why It Matters
8 |
9 | - Required for RPGC normalization (`--normalizeUsing RPGC`)
10 | - Affects accuracy of coverage calculations
11 | - Must match your data processing approach (filtered vs unfiltered reads)
12 |
13 | ## Calculation Methods
14 |
15 | 1. **Non-N bases**: Count of non-N nucleotides in genome sequence
16 | 2. **Unique mappability**: Regions of specific size that can be uniquely mapped (may consider edit distance)
17 |
18 | ## Common Organism Values
19 |
20 | ### Using Non-N Bases Method
21 |
22 | | Organism | Assembly | Effective Size | Full Command |
23 | |----------|----------|----------------|--------------|
24 | | Human | GRCh38/hg38 | 2,913,022,398 | `--effectiveGenomeSize 2913022398` |
25 | | Human | GRCh37/hg19 | 2,864,785,220 | `--effectiveGenomeSize 2864785220` |
26 | | Mouse | GRCm39/mm39 | 2,654,621,837 | `--effectiveGenomeSize 2654621837` |
27 | | Mouse | GRCm38/mm10 | 2,652,783,500 | `--effectiveGenomeSize 2652783500` |
28 | | Zebrafish | GRCz11 | 1,368,780,147 | `--effectiveGenomeSize 1368780147` |
29 | | *Drosophila* | dm6 | 142,573,017 | `--effectiveGenomeSize 142573017` |
30 | | *C. elegans* | WBcel235/ce11 | 100,286,401 | `--effectiveGenomeSize 100286401` |
31 | | *C. elegans* | ce10 | 100,258,171 | `--effectiveGenomeSize 100258171` |
32 |
33 | ### Human (GRCh38) by Read Length
34 |
35 | For quality-filtered reads, values vary by read length:
36 |
37 | | Read Length | Effective Size |
38 | |-------------|----------------|
39 | | 50bp | ~2.7 billion |
40 | | 75bp | ~2.8 billion |
41 | | 100bp | ~2.8 billion |
42 | | 150bp | ~2.9 billion |
43 | | 250bp | ~2.9 billion |
44 |
45 | ### Mouse (GRCm38) by Read Length
46 |
47 | | Read Length | Effective Size |
48 | |-------------|----------------|
49 | | 50bp | ~2.3 billion |
50 | | 75bp | ~2.5 billion |
51 | | 100bp | ~2.6 billion |
52 |
53 | ## Usage in deepTools
54 |
55 | The effective genome size is most commonly used with:
56 |
57 | ### bamCoverage with RPGC normalization
58 | ```bash
59 | bamCoverage --bam input.bam --outFileName output.bw \
60 | --normalizeUsing RPGC \
61 | --effectiveGenomeSize 2913022398
62 | ```
63 |
64 | ### bamCompare with RPGC normalization
65 | ```bash
66 | bamCompare -b1 treatment.bam -b2 control.bam \
67 | --outFileName comparison.bw \
68 | --scaleFactorsMethod RPGC \
69 | --effectiveGenomeSize 2913022398
70 | ```
71 |
72 | ### computeGCBias / correctGCBias
73 | ```bash
74 | computeGCBias --bamfile input.bam \
75 | --effectiveGenomeSize 2913022398 \
76 | --genome genome.2bit \
77 | --fragmentLength 200 \
78 | --biasPlot bias.png
79 | ```
80 |
81 | ## Choosing the Right Value
82 |
83 | **For most analyses:** Use the non-N bases method value for your reference genome
84 |
85 | **For filtered data:** If you apply strict quality filters or remove multimapping reads, consider using the read-length-specific values
86 |
87 | **When unsure:** Use the conservative non-N bases value - it's more widely applicable
88 |
89 | ## Common Shortcuts
90 |
91 | deepTools also accepts these shorthand values in some contexts:
92 |
93 | - `hs` or `GRCh38`: 2913022398
94 | - `mm` or `GRCm38`: 2652783500
95 | - `dm` or `dm6`: 142573017
96 | - `ce` or `ce10`: 100286401
97 |
98 | Check your specific deepTools version documentation for supported shortcuts.
99 |
100 | ## Calculating Custom Values
101 |
102 | For custom genomes or assemblies, calculate the non-N bases count:
103 |
104 | ```bash
105 | # Using faCount (UCSC tools)
106 | faCount genome.fa | grep "total" | awk '{print $2-$7}'
107 |
108 | # Using seqtk
109 | seqtk comp genome.fa | awk '{x+=$2}END{print x}'
110 | ```
111 |
112 | ## References
113 |
114 | For the most up-to-date effective genome sizes and detailed calculation methods, see:
115 | - deepTools documentation: https://deeptools.readthedocs.io/en/latest/content/feature/effectiveGenomeSize.html
116 | - ENCODE documentation for reference genome details
117 |
--------------------------------------------------------------------------------
/scientific-skills/paper-2-web/references/installation.md:
--------------------------------------------------------------------------------
1 | # Installation and Configuration
2 |
3 | ## System Requirements
4 |
5 | ### Hardware Requirements
6 | - **GPU**: NVIDIA A6000 (48GB minimum) required for video generation with talking-head features
7 | - **CPU**: Multi-core processor recommended for PDF processing and document conversion
8 | - **RAM**: 16GB minimum, 32GB recommended for large papers
9 |
10 | ### Software Requirements
11 | - **Python**: 3.11 or higher
12 | - **Conda**: Environment manager for dependency isolation
13 | - **LibreOffice**: Required for document format conversion (PDF to PPTX, etc.)
14 | - **Poppler utilities**: Required for PDF processing and manipulation
15 |
16 | ## Installation Steps
17 |
18 | ### 1. Clone the Repository
19 | ```bash
20 | git clone https://github.com/YuhangChen1/Paper2All.git
21 | cd Paper2All
22 | ```
23 |
24 | ### 2. Create Conda Environment
25 | ```bash
26 | conda create -n paper2all python=3.11
27 | conda activate paper2all
28 | ```
29 |
30 | ### 3. Install Dependencies
31 | ```bash
32 | pip install -r requirements.txt
33 | ```
34 |
35 | ### 4. Install System Dependencies
36 |
37 | **Ubuntu/Debian:**
38 | ```bash
39 | sudo apt-get install libreoffice poppler-utils
40 | ```
41 |
42 | **macOS:**
43 | ```bash
44 | brew install libreoffice poppler
45 | ```
46 |
47 | **Windows:**
48 | - Download and install LibreOffice from https://www.libreoffice.org/
49 | - Download and install Poppler from https://github.com/oschwartz10612/poppler-windows
50 |
51 | ## API Configuration
52 |
53 | Create a `.env` file in the project root with the following credentials:
54 |
55 | ### Required API Keys
56 |
57 | **Option 1: OpenAI API**
58 | ```
59 | OPENAI_API_KEY=your_openai_api_key_here
60 | ```
61 |
62 | **Option 2: OpenRouter API** (alternative to OpenAI)
63 | ```
64 | OPENROUTER_API_KEY=your_openrouter_api_key_here
65 | ```
66 |
67 | ### Optional API Keys
68 |
69 | **Google Search API** (for automatic logo discovery)
70 | ```
71 | GOOGLE_API_KEY=your_google_api_key_here
72 | GOOGLE_CSE_ID=your_custom_search_engine_id_here
73 | ```
74 |
75 | ## Model Configuration
76 |
77 | The system supports multiple LLM backends:
78 |
79 | ### Supported Models
80 | - GPT-4 (recommended for best quality)
81 | - GPT-4.1 (latest version)
82 | - GPT-3.5-turbo (faster, lower cost)
83 | - Claude models via OpenRouter
84 | - Other OpenRouter-supported models
85 |
86 | ### Model Selection
87 |
88 | Specify models using the `--model-choice` parameter or `--model_name_t` and `--model_name_v` parameters:
89 | - Model choice 1: GPT-4 for all components
90 | - Model choice 2: GPT-4.1 for all components
91 | - Custom: Specify separate models for text and visual processing
92 |
93 | ## Verification
94 |
95 | Test the installation:
96 |
97 | ```bash
98 | python pipeline_all.py --help
99 | ```
100 |
101 | If successful, you should see the help menu with all available options.
102 |
103 | ## Troubleshooting
104 |
105 | ### Common Issues
106 |
107 | **1. LibreOffice not found**
108 | - Ensure LibreOffice is installed and in your system PATH
109 | - Try running `libreoffice --version` to verify
110 |
111 | **2. Poppler utilities not found**
112 | - Verify installation with `pdftoppm -v`
113 | - Add Poppler bin directory to PATH if needed
114 |
115 | **3. GPU/CUDA errors for video generation**
116 | - Ensure NVIDIA drivers are up to date
117 | - Verify CUDA toolkit is installed
118 | - Check GPU memory with `nvidia-smi`
119 |
120 | **4. API key errors**
121 | - Verify `.env` file is in the project root
122 | - Check that API keys are valid and have sufficient credits
123 | - Ensure no extra spaces or quotes around keys in `.env`
124 |
125 | ## Directory Structure
126 |
127 | After installation, organize your workspace:
128 |
129 | ```
130 | Paper2All/
131 | ├── .env # API credentials
132 | ├── input/ # Place your paper files here
133 | │ └── paper_name/ # Each paper in its own directory
134 | │ └── main.tex # LaTeX source or PDF
135 | ├── output/ # Generated outputs
136 | │ └── paper_name/
137 | │ ├── website/ # Generated website files
138 | │ ├── video/ # Generated video files
139 | │ └── poster/ # Generated poster files
140 | └── ...
141 | ```
142 |
--------------------------------------------------------------------------------
/scientific-skills/aeon/references/clustering.md:
--------------------------------------------------------------------------------
1 | # Time Series Clustering
2 |
3 | Aeon provides clustering algorithms adapted for temporal data with specialized distance metrics and averaging methods.
4 |
5 | ## Partitioning Algorithms
6 |
7 | Standard k-means/k-medoids adapted for time series:
8 |
9 | - `TimeSeriesKMeans` - K-means with temporal distance metrics (DTW, Euclidean, etc.)
10 | - `TimeSeriesKMedoids` - Uses actual time series as cluster centers
11 | - `TimeSeriesKShape` - Shape-based clustering algorithm
12 | - `TimeSeriesKernelKMeans` - Kernel-based variant for nonlinear patterns
13 |
14 | **Use when**: Known number of clusters, spherical cluster shapes expected.
15 |
16 | ## Large Dataset Methods
17 |
18 | Efficient clustering for large collections:
19 |
20 | - `TimeSeriesCLARA` - Clustering Large Applications with sampling
21 | - `TimeSeriesCLARANS` - Randomized search variant of CLARA
22 |
23 | **Use when**: Dataset too large for standard k-medoids, need scalability.
24 |
25 | ## Elastic Distance Clustering
26 |
27 | Specialized for alignment-based similarity:
28 |
29 | - `KASBA` - K-means with shift-invariant elastic averaging
30 | - `ElasticSOM` - Self-organizing map using elastic distances
31 |
32 | **Use when**: Time series have temporal shifts or warping.
33 |
34 | ## Spectral Methods
35 |
36 | Graph-based clustering:
37 |
38 | - `KSpectralCentroid` - Spectral clustering with centroid computation
39 |
40 | **Use when**: Non-convex cluster shapes, need graph-based approach.
41 |
42 | ## Deep Learning Clustering
43 |
44 | Neural network-based clustering with auto-encoders:
45 |
46 | - `AEFCNClusterer` - Fully convolutional auto-encoder
47 | - `AEResNetClusterer` - Residual network auto-encoder
48 | - `AEDCNNClusterer` - Dilated CNN auto-encoder
49 | - `AEDRNNClusterer` - Dilated RNN auto-encoder
50 | - `AEBiGRUClusterer` - Bidirectional GRU auto-encoder
51 | - `AEAttentionBiGRUClusterer` - Attention-enhanced BiGRU auto-encoder
52 |
53 | **Use when**: Large datasets, need learned representations, or complex patterns.
54 |
55 | ## Feature-Based Clustering
56 |
57 | Transform to feature space before clustering:
58 |
59 | - `Catch22Clusterer` - Clusters on 22 canonical features
60 | - `SummaryClusterer` - Uses summary statistics
61 | - `TSFreshClusterer` - Automated tsfresh features
62 |
63 | **Use when**: Raw time series not informative, need interpretable features.
64 |
65 | ## Composition
66 |
67 | Build custom clustering pipelines:
68 |
69 | - `ClustererPipeline` - Chain transformers with clusterers
70 |
71 | ## Averaging Methods
72 |
73 | Compute cluster centers for time series:
74 |
75 | - `mean_average` - Arithmetic mean
76 | - `ba_average` - Barycentric averaging with DTW
77 | - `kasba_average` - Shift-invariant averaging
78 | - `shift_invariant_average` - General shift-invariant method
79 |
80 | **Use when**: Need representative cluster centers for visualization or initialization.
81 |
82 | ## Quick Start
83 |
84 | ```python
85 | from aeon.clustering import TimeSeriesKMeans
86 | from aeon.datasets import load_classification
87 |
88 | # Load data (using classification data for clustering)
89 | X_train, _ = load_classification("GunPoint", split="train")
90 |
91 | # Cluster time series
92 | clusterer = TimeSeriesKMeans(
93 | n_clusters=3,
94 | distance="dtw", # Use DTW distance
95 | averaging_method="ba" # Barycentric averaging
96 | )
97 | labels = clusterer.fit_predict(X_train)
98 | centers = clusterer.cluster_centers_
99 | ```
100 |
101 | ## Algorithm Selection
102 |
103 | - **Speed priority**: TimeSeriesKMeans with Euclidean distance
104 | - **Temporal alignment**: KASBA, TimeSeriesKMeans with DTW
105 | - **Large datasets**: TimeSeriesCLARA, TimeSeriesCLARANS
106 | - **Complex patterns**: Deep learning clusterers
107 | - **Interpretability**: Catch22Clusterer, SummaryClusterer
108 | - **Non-convex clusters**: KSpectralCentroid
109 |
110 | ## Distance Metrics
111 |
112 | Compatible distance metrics include:
113 | - Euclidean, Manhattan, Minkowski (lock-step)
114 | - DTW, DDTW, WDTW (elastic with alignment)
115 | - ERP, EDR, LCSS (edit-based)
116 | - MSM, TWE (specialized elastic)
117 |
118 | ## Evaluation
119 |
120 | Use clustering metrics from sklearn or aeon benchmarking:
121 | - Silhouette score
122 | - Davies-Bouldin index
123 | - Calinski-Harabasz index
124 |
--------------------------------------------------------------------------------
/scientific-skills/document-skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------