├── .config └── dotnet-tools.json ├── .fantomasignore ├── .github └── workflows │ └── dotnet.yml ├── .gitignore ├── BioProviders.TestsAndDocs.sln ├── BioProviders.sln ├── DataFileGenerator.fsx ├── Directory.Build.props ├── LICENSE.md ├── README.md ├── RELEASE_NOTES.md ├── build.cmd ├── build.fsx ├── build.fsx.lock ├── build.sh ├── build └── data │ ├── genbank-assemblies-#.txt.gz │ ├── genbank-assemblies-a.txt.gz │ ├── genbank-assemblies-b.txt.gz │ ├── genbank-assemblies-c.txt.gz │ ├── genbank-assemblies-d.txt.gz │ ├── genbank-assemblies-e.txt.gz │ ├── genbank-assemblies-f.txt.gz │ ├── genbank-assemblies-g.txt.gz │ ├── genbank-assemblies-h.txt.gz │ ├── genbank-assemblies-i.txt.gz │ ├── genbank-assemblies-j.txt.gz │ ├── genbank-assemblies-k.txt.gz │ ├── genbank-assemblies-l.txt.gz │ ├── genbank-assemblies-m.txt.gz │ ├── genbank-assemblies-n.txt.gz │ ├── genbank-assemblies-o.txt.gz │ ├── genbank-assemblies-p.txt.gz │ ├── genbank-assemblies-q.txt.gz │ ├── genbank-assemblies-r.txt.gz │ ├── genbank-assemblies-s.txt.gz │ ├── genbank-assemblies-t.txt.gz │ ├── genbank-assemblies-u.txt.gz │ ├── genbank-assemblies-v.txt.gz │ ├── genbank-assemblies-w.txt.gz │ ├── genbank-assemblies-x.txt.gz │ ├── genbank-assemblies-y.txt.gz │ ├── genbank-assemblies-z.txt.gz │ ├── genbank-species-#.txt.gz │ ├── genbank-species-a.txt.gz │ ├── genbank-species-b.txt.gz │ ├── genbank-species-c.txt.gz │ ├── genbank-species-d.txt.gz │ ├── genbank-species-e.txt.gz │ ├── genbank-species-f.txt.gz │ ├── genbank-species-g.txt.gz │ ├── genbank-species-h.txt.gz │ ├── genbank-species-i.txt.gz │ ├── genbank-species-j.txt.gz │ ├── genbank-species-k.txt.gz │ ├── genbank-species-l.txt.gz │ ├── genbank-species-m.txt.gz │ ├── genbank-species-n.txt.gz │ ├── genbank-species-o.txt.gz │ ├── genbank-species-p.txt.gz │ ├── genbank-species-q.txt.gz │ ├── genbank-species-r.txt.gz │ ├── genbank-species-s.txt.gz │ ├── genbank-species-t.txt.gz │ ├── genbank-species-u.txt.gz │ ├── genbank-species-v.txt.gz │ ├── genbank-species-w.txt.gz │ ├── genbank-species-x.txt.gz │ ├── genbank-species-y.txt.gz │ ├── genbank-species-z.txt.gz │ ├── refseq-assemblies-#.txt.gz │ ├── refseq-assemblies-a.txt.gz │ ├── refseq-assemblies-b.txt.gz │ ├── refseq-assemblies-c.txt.gz │ ├── refseq-assemblies-d.txt.gz │ ├── refseq-assemblies-e.txt.gz │ ├── refseq-assemblies-f.txt.gz │ ├── refseq-assemblies-g.txt.gz │ ├── refseq-assemblies-h.txt.gz │ ├── refseq-assemblies-i.txt.gz │ ├── refseq-assemblies-j.txt.gz │ ├── refseq-assemblies-k.txt.gz │ ├── refseq-assemblies-l.txt.gz │ ├── refseq-assemblies-m.txt.gz │ ├── refseq-assemblies-n.txt.gz │ ├── refseq-assemblies-o.txt.gz │ ├── refseq-assemblies-p.txt.gz │ ├── refseq-assemblies-q.txt.gz │ ├── refseq-assemblies-r.txt.gz │ ├── refseq-assemblies-s.txt.gz │ ├── refseq-assemblies-t.txt.gz │ ├── refseq-assemblies-u.txt.gz │ ├── refseq-assemblies-v.txt.gz │ ├── refseq-assemblies-w.txt.gz │ ├── refseq-assemblies-x.txt.gz │ ├── refseq-assemblies-y.txt.gz │ ├── refseq-assemblies-z.txt.gz │ ├── refseq-species-#.txt.gz │ ├── refseq-species-a.txt.gz │ ├── refseq-species-b.txt.gz │ ├── refseq-species-c.txt.gz │ ├── refseq-species-d.txt.gz │ ├── refseq-species-e.txt.gz │ ├── refseq-species-f.txt.gz │ ├── refseq-species-g.txt.gz │ ├── refseq-species-h.txt.gz │ ├── refseq-species-i.txt.gz │ ├── refseq-species-j.txt.gz │ ├── refseq-species-k.txt.gz │ ├── refseq-species-l.txt.gz │ ├── refseq-species-m.txt.gz │ ├── refseq-species-n.txt.gz │ ├── refseq-species-o.txt.gz │ ├── refseq-species-p.txt.gz │ ├── refseq-species-q.txt.gz │ ├── refseq-species-r.txt.gz │ ├── refseq-species-s.txt.gz │ ├── refseq-species-t.txt.gz │ ├── refseq-species-u.txt.gz │ ├── refseq-species-v.txt.gz │ ├── refseq-species-w.txt.gz │ ├── refseq-species-x.txt.gz │ ├── refseq-species-y.txt.gz │ └── refseq-species-z.txt.gz ├── docs ├── _template.fsx ├── _template.html ├── _template.ipynb ├── img │ ├── GenBankProvider.gif │ ├── GenBank_Info.gif │ ├── RefSeq_Info.gif │ ├── badge-notebook.svg │ ├── badge-script.svg │ ├── logo.pdn │ └── logo.png ├── index.md └── library │ ├── GenBankProvider.fsx │ └── RefSeqProvider.fsx ├── global.json ├── paket.dependencies ├── paket.lock ├── src ├── DesignTime │ ├── Common.fs │ ├── DesignTime.fs │ ├── DesignTime.fsproj │ ├── TypeGenerator.fs │ ├── paket.references │ └── remote.txt └── RunTime │ ├── GenBankAssembly.fs │ ├── GenBankFlatFile.fs │ ├── GenBankMetadata.fs │ ├── GenBankSequence.fs │ ├── GenBankSpecies.fs │ ├── GenBankTaxon.fs │ ├── RunTime.fs │ ├── RunTime.fsproj │ └── paket.references └── tests ├── ProviderTests ├── Program.fs ├── ProviderTests.fs ├── ProviderTests.fsproj ├── ProviderTests.fsx └── paket.references └── Tests ├── ContextTests.fs ├── Data.fs ├── Program.fs ├── Tests.fsproj └── paket.references /.config/dotnet-tools.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "isRoot": true, 4 | "tools": { 5 | "paket": { 6 | "version": "6.2.1", 7 | "commands": [ 8 | "paket" 9 | ] 10 | }, 11 | "fsdocs-tool": { 12 | "version": "19.1.1", 13 | "commands": [ 14 | "fsdocs" 15 | ] 16 | }, 17 | "fake-cli": { 18 | "version": "5.23.1", 19 | "commands": [ 20 | "fake" 21 | ] 22 | }, 23 | "fantomas": { 24 | "version": "5.2.0", 25 | "commands": [ 26 | "fantomas" 27 | ] 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /.fantomasignore: -------------------------------------------------------------------------------- 1 | paket-files/ 2 | tests/ -------------------------------------------------------------------------------- /.github/workflows/dotnet.yml: -------------------------------------------------------------------------------- 1 | name: .NET 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build-windows: 11 | 12 | runs-on: windows-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Setup .NET 17 | uses: actions/setup-dotnet@v2 18 | with: 19 | dotnet-version: 6.0.401 20 | - name: Restore tools 21 | run: dotnet tool restore 22 | - name: Restore packages 23 | run: dotnet paket restore 24 | - name: Build and test 25 | run: dotnet fake build -t All 26 | - name: Deploy documentation 27 | uses: peaceiris/actions-gh-pages@v3 28 | with: 29 | personal_token: ${{ secrets.GITHUB_TOKEN }} 30 | publish_dir: ./output 31 | publish_branch: gh-pages 32 | force_orphan: true 33 | - name: Publish NuGets (if this version not published before) 34 | run: dotnet nuget push bin\BioProviders.*.nupkg -s https://api.nuget.org/v3/index.json -k ${{ secrets.NUGET_ORG_TOKEN_2023 }} --skip-duplicate 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | 352 | # Paket 353 | .paket/ 354 | paket-files/ 355 | 356 | # Documentation 357 | temp/ 358 | output/ 359 | .fsdocs 360 | tmp/ 361 | 362 | # FAKE 363 | .fake -------------------------------------------------------------------------------- /BioProviders.TestsAndDocs.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30114.105 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{AFD5787D-8010-4133-82D1-811FAB073EFE}" 7 | EndProject 8 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "ProviderTests", "tests\ProviderTests\ProviderTests.fsproj", "{DCA048A8-D577-4EA2-A1AA-13BFB8F49B4F}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Release|Any CPU = Release|Any CPU 14 | EndGlobalSection 15 | GlobalSection(SolutionProperties) = preSolution 16 | HideSolutionNode = FALSE 17 | EndGlobalSection 18 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 19 | {DCA048A8-D577-4EA2-A1AA-13BFB8F49B4F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 20 | {DCA048A8-D577-4EA2-A1AA-13BFB8F49B4F}.Debug|Any CPU.Build.0 = Debug|Any CPU 21 | {DCA048A8-D577-4EA2-A1AA-13BFB8F49B4F}.Release|Any CPU.ActiveCfg = Release|Any CPU 22 | {DCA048A8-D577-4EA2-A1AA-13BFB8F49B4F}.Release|Any CPU.Build.0 = Release|Any CPU 23 | EndGlobalSection 24 | GlobalSection(NestedProjects) = preSolution 25 | {DCA048A8-D577-4EA2-A1AA-13BFB8F49B4F} = {AFD5787D-8010-4133-82D1-811FAB073EFE} 26 | EndGlobalSection 27 | EndGlobal 28 | -------------------------------------------------------------------------------- /BioProviders.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.2.32630.192 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{025231A8-CE38-46CE-A07D-572F437DA3B9}" 7 | EndProject 8 | Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "DesignTime", "src\DesignTime\DesignTime.fsproj", "{6DC356AF-AE85-44C9-8426-234CCE926DAA}" 9 | EndProject 10 | Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "RunTime", "src\RunTime\RunTime.fsproj", "{37DB50D8-6BAF-4D54-B576-AF5057D4F752}" 11 | EndProject 12 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{DE391CD5-5005-4872-8E27-DF751DF180BC}" 13 | EndProject 14 | Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Tests", "tests\Tests\Tests.fsproj", "{95641729-C3CB-4BC3-A82D-B11D9C4D4BB9}" 15 | EndProject 16 | Global 17 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 18 | Debug|Any CPU = Debug|Any CPU 19 | Release|Any CPU = Release|Any CPU 20 | EndGlobalSection 21 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 22 | {6DC356AF-AE85-44C9-8426-234CCE926DAA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 23 | {6DC356AF-AE85-44C9-8426-234CCE926DAA}.Debug|Any CPU.Build.0 = Debug|Any CPU 24 | {6DC356AF-AE85-44C9-8426-234CCE926DAA}.Release|Any CPU.ActiveCfg = Release|Any CPU 25 | {6DC356AF-AE85-44C9-8426-234CCE926DAA}.Release|Any CPU.Build.0 = Release|Any CPU 26 | {37DB50D8-6BAF-4D54-B576-AF5057D4F752}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 27 | {37DB50D8-6BAF-4D54-B576-AF5057D4F752}.Debug|Any CPU.Build.0 = Debug|Any CPU 28 | {37DB50D8-6BAF-4D54-B576-AF5057D4F752}.Release|Any CPU.ActiveCfg = Release|Any CPU 29 | {37DB50D8-6BAF-4D54-B576-AF5057D4F752}.Release|Any CPU.Build.0 = Release|Any CPU 30 | {95641729-C3CB-4BC3-A82D-B11D9C4D4BB9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 31 | {95641729-C3CB-4BC3-A82D-B11D9C4D4BB9}.Debug|Any CPU.Build.0 = Debug|Any CPU 32 | {95641729-C3CB-4BC3-A82D-B11D9C4D4BB9}.Release|Any CPU.ActiveCfg = Release|Any CPU 33 | {95641729-C3CB-4BC3-A82D-B11D9C4D4BB9}.Release|Any CPU.Build.0 = Release|Any CPU 34 | EndGlobalSection 35 | GlobalSection(SolutionProperties) = preSolution 36 | HideSolutionNode = FALSE 37 | EndGlobalSection 38 | GlobalSection(NestedProjects) = preSolution 39 | {6DC356AF-AE85-44C9-8426-234CCE926DAA} = {025231A8-CE38-46CE-A07D-572F437DA3B9} 40 | {37DB50D8-6BAF-4D54-B576-AF5057D4F752} = {025231A8-CE38-46CE-A07D-572F437DA3B9} 41 | {95641729-C3CB-4BC3-A82D-B11D9C4D4BB9} = {DE391CD5-5005-4872-8E27-DF751DF180BC} 42 | EndGlobalSection 43 | GlobalSection(ExtensibilityGlobals) = postSolution 44 | SolutionGuid = {D320B7D7-6926-41BC-AF81-DE221CED9CC4} 45 | EndGlobalSection 46 | EndGlobal 47 | -------------------------------------------------------------------------------- /DataFileGenerator.fsx: -------------------------------------------------------------------------------- 1 | #r "nuget: CsvHelper" 2 | #r "nuget: FluentFTP, 34.0.1" 3 | #r "nuget: Goblinfactory.Konsole" 4 | 5 | open CsvHelper 6 | open System.IO 7 | open CsvHelper.Configuration 8 | open System.Globalization 9 | open System.IO.Compression 10 | open FluentFTP 11 | open Konsole 12 | 13 | // ------ Record types used for reading and writing files ------ 14 | // Rows for the original GenBank TSV file. 15 | type FileRow = { 16 | assembly_accession : string 17 | bioproject : string 18 | biosample : string 19 | wgs_master : string 20 | refseq_category : string 21 | taxid : string 22 | species_taxid : string 23 | organism_name : string 24 | infraspecific_name : string 25 | isolate : string 26 | version_status : string 27 | assembly_level : string 28 | release_type : string 29 | genome_rep : string 30 | seq_rel_date : string 31 | asm_name : string 32 | asm_submitter : string 33 | gbrs_paired_asm : string 34 | paired_asm_comp : string 35 | ftp_path : string 36 | excluded_from_refseq : string 37 | relation_to_type_material : string 38 | asm_not_live_date : string 39 | assembly_type : string 40 | group : string 41 | genome_size : string 42 | genome_size_ungapped : string 43 | gc_percent : string 44 | replicon_count : string 45 | scaffold_count : string 46 | contig_count : string 47 | annotation_provider : string 48 | annotation_name : string 49 | annotation_date : string 50 | total_gene_count : string 51 | protein_coding_gene_count : string 52 | non_coding_gene_count : string 53 | pubmed_id : string 54 | } 55 | 56 | // Rows for the generated assembly TSV file. 57 | type AssemblyRow = { 58 | species_id : string 59 | assembly_accession : string 60 | ftp_path : string 61 | } 62 | 63 | // Rows for the generated species TSV file. 64 | type SpeciesRow = { 65 | species_id : string 66 | species_name : string 67 | } 68 | 69 | /// Typed representation of an NCBI Database. NCBI contains two main genome databases 70 | /// GenBank and RefSeq. 71 | type DatabaseName = 72 | | GenBank 73 | | RefSeq 74 | 75 | // Returns the base path of the files of each database. Used to remove the 76 | // necessary characters from the URLs in the original assembly list when 77 | // creating the new lists. 78 | member this.GetBasePath() = 79 | match this with 80 | | GenBank -> "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/" 81 | | RefSeq -> "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/" 82 | 83 | // Returns the location of the assembly file on the FTP server for the 84 | // database. Does not include the host path. 85 | member this.GetAssemblyFilePath() = 86 | match this with 87 | | GenBank -> "/genomes/genbank/assembly_summary_genbank.txt" 88 | | RefSeq -> "/genomes/refseq/assembly_summary_refseq.txt" 89 | 90 | // Returns the name of the database as a string. 91 | member this.GetName() = 92 | match this with 93 | | GenBank -> "GenBank" 94 | | RefSeq -> "RefSeq" 95 | 96 | // Returns the filename of the assembly file. 97 | member this.GetFilename() = 98 | match this with 99 | | GenBank -> "assembly_summary_genbank.txt" 100 | | RefSeq -> "assembly_summary_refseq.txt" 101 | 102 | // Character array. 103 | let characters = Seq.concat [['#']; ['a' .. 'z']] 104 | 105 | // ------ Functions for generating and writing data files ------ 106 | // Function for matching the first character of a species name. 107 | // Characters that are not letters are treated as a '#'. 108 | let getLookupCharacter (name: string) = 109 | match name.Chars(0) with 110 | | c when System.Char.IsLetter(c) -> System.Char.ToLower(c) 111 | | _ -> '#' 112 | 113 | // Generate a list of distinct species with a unique species ID number for 114 | // each, starting with the specified character. 115 | // Species rows have two properties: 116 | // - The species ID; and 117 | // - The species name. 118 | let getSpeciesList (filteredList : FileRow list) (count : int) = 119 | 120 | // Get a distinct list of species names. 121 | // Also sorts it into alphabetical order. 122 | let distinctList = List.sort (List.distinct (List.map (fun row -> row.organism_name) filteredList)) 123 | 124 | // Return a list of SpeciesRows. 125 | List.mapi (fun i name -> { species_id = (i + count).ToString() ; species_name = name }) distinctList 126 | 127 | // Generate a list of assemblies belonging to the species of a specified 128 | // character, with the correct ID number for their species. 129 | // Assembly rows have three properties: 130 | // - The species ID; 131 | // - The assembly's accession ID; and 132 | // - A path for the FTP location on NCBI's servers. This path does not include 133 | // the base path as the type provider adds that itself. 134 | let getAssemblyList (database : DatabaseName) (filteredList : FileRow list) (speciesList : SpeciesRow list) = 135 | 136 | // Function for finding a species name match for a certain row. 137 | let findNameMatch row = List.tryFind (fun species -> species.species_name.Equals(row.organism_name)) speciesList 138 | 139 | // Filter the CSV rows by those that have one of the organism names in the 140 | // supplied list, and that have a FTP path that isn't "na". 141 | let listWithPaths = List.filter (fun (row : FileRow) -> not (row.ftp_path.Equals("na"))) filteredList 142 | 143 | // Function for sorting a list of AssemblyRows. It should be in the order 144 | // of species IDs, and then the accessions if the IDs are the same. 145 | let sortAssemblies (assembly1 : AssemblyRow) (assembly2: AssemblyRow) = 146 | match assembly1.species_id.CompareTo(assembly2.species_id) with 147 | | 0 -> assembly1.assembly_accession.CompareTo(assembly2.assembly_accession) 148 | | result -> result 149 | 150 | // Return a (sorted) list of AssemblyRows. 151 | List.sortWith sortAssemblies (List.map (fun row -> { species_id = ((findNameMatch row).Value.species_id) ; assembly_accession = row.assembly_accession ; ftp_path = row.ftp_path.[(String.length (database.GetBasePath()))..] } ) listWithPaths) 152 | 153 | // Compresses a written text file using GZip compression, writes it to a new 154 | // file and deletes the original. 155 | let compressFile (filename : string) = 156 | 157 | // Open the original file. 158 | let originalFile = File.OpenRead(filename) 159 | 160 | // Create a stream for a new GZip file. 161 | let gZipFile = (new FileInfo(filename + ".gz")).Create() 162 | let gZipStream = new GZipStream(gZipFile, CompressionMode.Compress) 163 | 164 | // Send the original file to the GZip stream to create the compressed 165 | // version. 166 | originalFile.CopyTo(gZipStream) 167 | 168 | // Ensure everything is written to the file before closing it. 169 | gZipStream.Flush() 170 | gZipStream.Close() 171 | 172 | // Close the original file and delete it. 173 | originalFile.Close() 174 | File.Delete(filename) 175 | 176 | // Function for writing a list of species or assemblies to a CSV under GZip 177 | // compression. 178 | let writeFile (filename : string) list = 179 | 180 | // Write the records to the file as comma separated values. 181 | let writer = new StreamWriter(filename) 182 | let csv = new CsvWriter(writer, CultureInfo.InvariantCulture) 183 | csv.WriteRecords(list) 184 | 185 | // Ensure everything is written to the file before closing it. 186 | writer.Flush() 187 | writer.Close() 188 | 189 | // Call the function to compress the file. 190 | compressFile(filename) 191 | 192 | // ------ FTP functions ------ 193 | 194 | /// Creates and uses a connection with the NCBI FTP server. 195 | let internal useNCBIConnection (callback) = 196 | let serverBaseLocation = "ftp://ftp.ncbi.nlm.nih.gov" 197 | use client = new FtpClient(serverBaseLocation) 198 | client.Connect() 199 | callback client 200 | 201 | // Checks if a file exists and if so, whether it is older than the remote 202 | // file. 203 | // - If a file doesn't exist, or is older: return to overwrite existing 204 | // file. 205 | // - Otherwise: try to resume existing file (in case it wasn't 206 | // downloaded fully before). 207 | let isNewerFile (localPath: string) (remotePath: string) (connection: FtpClient) = 208 | if (not (File.Exists(localPath))) then 209 | FtpLocalExists.Overwrite 210 | else 211 | match File.GetLastWriteTime(localPath) > connection.GetModifiedTime(remotePath) with 212 | | true -> 213 | printfn "Previously downloaded file is the most current version. Will continue download if required." 214 | FtpLocalExists.Append 215 | | _ -> 216 | printfn "Remote file is newer than previously downloaded file. Will redownload." 217 | FtpLocalExists.Overwrite 218 | 219 | /// Downloads a file from the NCBI FTP server to the local file system. 220 | let downloadNCBIFile (localPath: string, remotePath: string) = 221 | let downloadFile (connection: FtpClient) = 222 | 223 | // Check if there's a newer file first. 224 | let operation = isNewerFile localPath remotePath connection 225 | 226 | // Controls the progress bar for downloads. 227 | let progressBar = new ProgressBar(100) 228 | let progress = new System.Action(fun x -> 229 | match x.Progress with 230 | | 100.0 -> progressBar.Refresh(100, "Complete.") 231 | | _ -> progressBar.Refresh(int x.Progress, "Downloading...") 232 | ) 233 | 234 | // Check for changed file as well as verification. 235 | connection.DownloadFile( 236 | localPath, 237 | remotePath, 238 | operation, 239 | FtpVerify.Retry, 240 | progress 241 | ) 242 | 243 | useNCBIConnection downloadFile 244 | 245 | // Creates the path for saving a downloaed NCBI file. 246 | let createDownloadPath (database : DatabaseName) = 247 | (Path.Combine(Path.GetTempPath(), "BioProviders_Build", (database.GetFilename()))) 248 | 249 | // ------ Parsing operations ------ 250 | 251 | // Download the corresponding assembly file from the GenBank FTP server and 252 | // parse it into a set of records. 253 | let getFtpList (database : DatabaseName) = 254 | let downloadedFilePath = createDownloadPath database 255 | printfn "%s summary file will be downloaded to %s." (database.GetName()) downloadedFilePath 256 | 257 | // Attempt to download the file, and then check the status of the download. 258 | let status = downloadNCBIFile (downloadedFilePath, (database.GetAssemblyFilePath())) 259 | match status with 260 | | FtpStatus.Failed -> failwith "Failed to download file from NCBI FTP server." 261 | | FtpStatus.Skipped -> printfn "File already downloaded." 262 | | _ -> printfn "File downloaded successfully." 263 | 264 | printfn "Loading in %s assembly summary TSV..." (database.GetName()) 265 | 266 | // Load in the GenBank file. 267 | let reader = new StreamReader(downloadedFilePath) 268 | 269 | // A function to skip lines that start with ##, to ignore the comment. 270 | let skipFunction (args : ShouldSkipRecordArgs) = 271 | args.Row[0].StartsWith("##") 272 | 273 | // Configuration for the CSV reader. It: 274 | // - Chooses tab as the delimiter; 275 | // - Sets the mode to no escape to ignore quotes; 276 | // - Uses the above function to skip comment lines; and 277 | // - Clear the # symbol on any headers. 278 | let config = new CsvConfiguration(CultureInfo.InvariantCulture) 279 | config.Delimiter <- "\t" 280 | config.Mode <- CsvMode.NoEscape 281 | config.ShouldSkipRecord <- new ShouldSkipRecord(skipFunction) 282 | config.PrepareHeaderForMatch <- fun args -> args.Header.TrimStart('#') 283 | 284 | // Create a CSV reader object and get all records in the loaded file. 285 | let csv = new CsvReader(reader, config) 286 | let records = Seq.toList (csv.GetRecords()) 287 | 288 | // Show how many records were loaded. 289 | printfn "%s TSV loaded successfully with a total of %i records." (database.GetName()) (List.length records) 290 | 291 | // Close the file and return the records. 292 | reader.Close() 293 | records 294 | 295 | 296 | // Generate a list of species and assembies for the given characater, and write 297 | // them to a file. An integer acculmulator is used to ensure unique numerical 298 | // IDs for all distinct species. 299 | let generateDatabaseLists location (database : DatabaseName) (fullList : FileRow list) (progressBar : ProgressBar) (acc : int) (character : char) = 300 | 301 | // Update the progress bar. 302 | progressBar.Refresh((Seq.findIndex ((=) character) characters), $"Processing \"{character}\" species") 303 | 304 | // Filter the full list of assemblies for only those that have an organism 305 | // name matching the current character. 306 | let filteredList = List.filter (fun row -> (getLookupCharacter row.organism_name).Equals(character)) fullList 307 | 308 | // Generate the lists of species and assemblies for the given character. 309 | let speciesList = (getSpeciesList filteredList acc) 310 | let assemblyList = (getAssemblyList database filteredList speciesList) 311 | 312 | // Ensure that the destination folder exists. 313 | Directory.CreateDirectory(location) |> ignore 314 | 315 | // Generate the filenames for the species and assembly files. 316 | let speciesFilename = $"{location}{(database.GetName().ToLower())}-species-{character}.txt" 317 | let assemblyFilename = $"{location}{(database.GetName().ToLower())}-assemblies-{character}.txt" 318 | 319 | // Write the species entries to a file. 320 | writeFile speciesFilename speciesList 321 | 322 | // Write the assembly entries to a file. 323 | writeFile assemblyFilename assemblyList 324 | 325 | // Add the number of new species to the acculmulator, to start at the 326 | // correct number for the next character. 327 | acc + List.length speciesList 328 | 329 | // Attempts to delete a downloaded NCBI file. 330 | let tryDelete database = 331 | let filename = createDownloadPath database 332 | 333 | // Attempt to delete the file. 334 | try 335 | File.Delete(filename) 336 | printfn "Deleted downloaded file." 337 | with 338 | | :? IOException as ex -> 339 | printfn "Could not delete downloaded file because of exception \"%s\". %s will need to be deleted manually." ex.Message filename 340 | 341 | 342 | // Generates the lists for the specified database. 343 | let generateLists clearCache location (database : DatabaseName) = 344 | printfn "------ Creating lists for %s... ------" (database.GetName()) 345 | try 346 | // Generate the list of records from a downloaded TSV. 347 | let records = getFtpList database 348 | printfn "Generating new lists from loaded %s assembly list..." (database.GetName()) 349 | 350 | // Set up a progress bar to show the progress of parsing records. 351 | let progressBar = new ProgressBar(Seq.length characters) 352 | 353 | // Parse the records to extract the correct properties and save 354 | // them as new compressed lists. 355 | printfn "Generated lists for %i species." (Seq.fold (generateDatabaseLists location database records progressBar) 0 characters) 356 | progressBar.Refresh(Seq.length characters, "All species complete.") 357 | printfn "------ %s operations successful. ------" (database.GetName()) 358 | if (clearCache) then tryDelete database 359 | with 360 | | _ as ex -> 361 | printfn "Encountered exception \"%s\" while trying to generate lists." ex.Message 362 | printfn "------ %s operations failed. ------" (database.GetName()) 363 | 364 | // If the operation failed, it might not be to do with the download, so 365 | // the user might want to keep the file for another attempt. 366 | if (clearCache) then 367 | printfn "Should the downloaded file be deleted? (y/n)" 368 | let userKey = System.Console.ReadKey().KeyChar 369 | match userKey with 370 | | 'y' | 'Y' -> tryDelete database 371 | | 'n' | 'N' -> () 372 | | _ -> printfn "Defaulted to \"n\"." 373 | 374 | // ------ Main program ------ 375 | let args = fsi.CommandLineArgs |> Array.tail 376 | 377 | // Check if the script was executed with a "-saveToTemp" argument. 378 | // This saves the files to the "BioProviders" folder in AppData\Local that the 379 | // type provider uses. 380 | let targetFolder = 381 | match Seq.tryFind (fun arg -> arg.Equals("-saveToCache")) args with 382 | | Some value -> Path.Combine(Path.GetTempPath(), "BioProviders\\") 383 | | None -> "./build/data/" 384 | 385 | // Check if the script was executed with a "-keepDownloads" argument. 386 | // This keeps the files that were downloaded to generate the lists in 387 | // a "BioProviders_Build" folder in AppData\Local. 388 | let clearCache = 389 | match Seq.tryFind (fun arg -> arg.Equals("-keepDownloads")) args with 390 | | Some value -> false 391 | | None -> true 392 | 393 | // Begin the process of generating lists. 394 | printfn "------------ Starting operations to generate GenBank and RefSeq data file lists for BioProviders. ------------" 395 | printfn "Save location is %s" targetFolder 396 | generateLists clearCache targetFolder GenBank 397 | generateLists clearCache targetFolder RefSeq 398 | printfn "------------ All operations completed. ------------" -------------------------------------------------------------------------------- /Directory.Build.props: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | The BioProviders library provides tools and functionality to simplify accessing and 6 | manipulating bioinformatic data. This library includes: 7 | 8 | * GenBankProvider -- Type Provider for type-safe access to over 240 million genomic 9 | sequences and their metadata. 10 | 11 | Alex Kenna;Samuel Smith;fsprojects contributors 12 | Copyright 2022 13 | F# fsharp data typeprovider bioinformatics genbank refseq 14 | https://github.com/fsprojects/BioProviders 15 | https://fsprojects.github.io/BioProviders/ 16 | https://raw.githubusercontent.com/fsprojects/FSharp.Formatting/master/docs/files/img/logo.png 17 | MIT 18 | git 19 | 20 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Alex Kenna 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # F# BioProviders 2 | 3 | [![Made with F#](https://img.shields.io/badge/Made%20with-FSharp-rgb(184,69,252).svg)](https://fsharp.org/) [![NuGet Status](https://img.shields.io/nuget/v/BioProviders.svg)](https://www.nuget.org/packages/BioProviders/) [![GitHub contributors](https://img.shields.io/github/contributors/AlexKenna/BioProviders.svg)](https://github.com/AlexKenna/BioProviders/graphs/contributors) 4 | 5 | The F# BioProviders simplify programmatic access to bioinformatics data. 6 | 7 | This library provides strongly-typed access to over 240 million genomic sequences through a set of Type Providers, including the GenBankProvider and RefSeqProvider. For more information, see the detailed [documentation](https://fsprojects.github.io/BioProviders/). 8 | 9 | The BioProviders work by parsing genomic data files using the [.NET Bio](https://github.com/dotnetbio/bio) library, which are then represented using types from the [BioFSharp](https://github.com/CSBiology/BioFSharp) library. 10 | 11 | ## Example 12 | 13 | Below, a simple example of finding the complement of the genomic sequence of a Staphylococcus lugdunensis assembly is provided. 14 | 15 | ```fsharp 16 | #r "nuget: BioProviders" 17 | 18 | open BioProviders 19 | open BioFSharp 20 | 21 | let [] Species = "Staphylococcus lugdunensis" 22 | let [] Accession = "GCA_001546615.1" 23 | 24 | let genome = GenBankProvider.Genome() 25 | 26 | genome.Sequence |> BioSeq.complement 27 | ``` 28 | 29 | The above code produces the result: 30 | 31 | ```fsharp 32 | BioSeq.BioSeq = seq [C; T; A; C; ...] 33 | 34 | ``` 35 | 36 | ## Building 37 | [![Build Status](https://github.com/AlexKenna/BioProviders/actions/workflows/dotnet.yml/badge.svg)](https://github.com/AlexKenna/BioProviders/actions) 38 | 39 | To build the BioProviders package, perform the following steps: 40 | 41 | * Install the .NET SDK specified in the global.json file 42 | * `build.sh -t Build` or `build.cmd -t Build` 43 | 44 | ## Creating data files 45 | 46 | BioProviders uses a set of data files generated from assembly lists from the NCBI FTP server for species and assembly lookup. 47 | 48 | - To generate these files, run ```dotnet fsi DataFileGenerator.fsx```, to save the files to ```build\data```. 49 | - Approximately 1 GB is required due to the download size. They are deleted on process completion; use the argument ```-keepDownloads``` to keep them. 50 | - To save the files in the type provider's cache folder, use the argument ```-saveToCache```. 51 | - By default, the package downloads files from this repository to ```AppData\Local\Temp\BioProviders```. To change this for your own version, change the URL in the file ```remote.txt``` in ```src\DesignTime```. 52 | 53 | ## Formatting 54 | 55 | The BioProviders package code is formatted using [fantomas](https://fsprojects.github.io/fantomas/). 56 | 57 | * To format the code, run `build.sh -t Format` or `build.cmd -t Format` 58 | * To check formatting, run `build.sh -t CheckFormat` or `build.cmd -t CheckFormat` 59 | 60 | ## License 61 | 62 | BioProviders is covered by the MIT license. 63 | 64 | The package also uses: 65 | - [BioFSharp](https://github.com/CSBiology/BioFSharp) - MIT license 66 | - [.NET Bio](https://github.com/dotnetbio/bio) - Apache-2.0 license 67 | - [FluentFTP](https://github.com/robinrodricks/FluentFTP) - MIT license 68 | - [FSharp.Data](https://github.com/fsprojects/FSharp.Data/) - Apache-2.0 license 69 | 70 | ## Maintainers 71 | 72 | Current maintainers are [Alex Kenna](https://github.com/AlexKenna), [Samuel Smith](https://github.com/n7581769) and [James Hogan](https://github.com/jamesmhogan). 73 | -------------------------------------------------------------------------------- /RELEASE_NOTES.md: -------------------------------------------------------------------------------- 1 | ### 2.2.1 - Nov 2 2023 2 | * Fixed exception thrown when BioProviders folder does not exist in AppData/Local/Temp and the type provider attempts to download a data file. 3 | 4 | ### 2.2.0 - Oct 24 2023 5 | * Added RefSeq type provider. 6 | 7 | ### 2.1.0 - Oct 9 2023 8 | * Fixed bug in previous versions that prevented data files from being accessed. 9 | * Data files are no longer included to reduce package size - automatically downloaded on demand. 10 | * Cached files in temporary folder are removed after 90 days if not used within that time. 11 | 12 | ### 2.0.0 - Feb 2 2023 13 | * Only species name and accession must now be provided to GenBankProvider. 14 | * Added support for wildcards in both species name and accession. 15 | * General changes to the GenBankProvider structure. 16 | 17 | ### 1.0.0 - Jun 30 2022 18 | * Initial release. 19 | * GenBankProvider included. -------------------------------------------------------------------------------- /build.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | dotnet tool restore 4 | dotnet paket restore 5 | dotnet fake build %* -------------------------------------------------------------------------------- /build.fsx: -------------------------------------------------------------------------------- 1 | // -------------------------------------------------------------------------------------- 2 | // FAKE Build Script. 3 | // -------------------------------------------------------------------------------------- 4 | 5 | #r "paket: groupref build //" 6 | 7 | #if !FAKE 8 | #load ".fake/build.fsx/intellisense.fsx" 9 | #r "netstandard" 10 | #endif 11 | 12 | open System 13 | open System.IO 14 | open Fake.Core 15 | open Fake.DotNet 16 | open Fake.DotNet.NuGet 17 | open Fake.DotNet.Testing 18 | open Fake.IO 19 | open Fake.IO.FileSystemOperators 20 | open Fake.IO.Globbing.Operators 21 | open Fake.Core.TargetOperators 22 | open Fake.Tools.Git 23 | 24 | Environment.CurrentDirectory <- __SOURCE_DIRECTORY__ 25 | 26 | let (!!) includes = 27 | (!!includes).SetBaseDirectory __SOURCE_DIRECTORY__ 28 | 29 | 30 | // -------------------------------------------------------------------------------------- 31 | // Project Information. 32 | // -------------------------------------------------------------------------------------- 33 | 34 | let project = "BioProviders" 35 | let authors = "Alex Kenna;Samuel Smith;fsprojects contributors" 36 | let summary = "F# library for accessing and manipulating bioinformatic datasets." 37 | 38 | let description = 39 | """ 40 | The BioProviders library provides tools and functionality to simplify accessing and 41 | manipulating bioinformatic data. This library includes: 42 | 43 | * GenBankProvider — Type Provider for type-safe access to the genomic sequences and 44 | their metadata for over 500,000 species in the GenBank database. 45 | * RefSeqProvider — Type Provider for type-safe access to the genomic sequences and 46 | their metadata for over 140,000 species in the RefSeq database. 47 | """ 48 | 49 | let tags = "F# fsharp data typeprovider bioinformatics genbank refseq" 50 | 51 | let gitOwner = "fsprojects" 52 | let gitHome = "https://github.com/" + gitOwner 53 | let gitName = "BioProviders" 54 | 55 | let packageProjectUrl = "https://fsprojects.github.io/BioProviders/" 56 | let repositoryType = "git" 57 | let repositoryUrl = "https://github.com/fsprojects/BioProviders" 58 | let license = "MIT" 59 | 60 | // Read release notes & version info from RELEASE_NOTES.md 61 | let release = ReleaseNotes.load "RELEASE_NOTES.md" 62 | 63 | let isCI = Environment.GetEnvironmentVariable("CI") <> null 64 | 65 | 66 | // -------------------------------------------------------------------------------------- 67 | // Generate Assembly Information. 68 | // -------------------------------------------------------------------------------------- 69 | 70 | Target.create "AssemblyInfo" (fun _ -> 71 | for file in !! "src/AssemblyInfo*.fs" do 72 | let replace (oldValue: string) newValue (str: string) = str.Replace(oldValue, newValue) 73 | 74 | let title = 75 | Path.GetFileNameWithoutExtension file |> replace "AssemblyInfo" "BioProviders" 76 | 77 | let versionSuffix = ".0" 78 | let version = release.AssemblyVersion + versionSuffix 79 | 80 | AssemblyInfoFile.createFSharp 81 | file 82 | [ AssemblyInfo.Title title 83 | AssemblyInfo.Product project 84 | AssemblyInfo.Description summary 85 | AssemblyInfo.Version version 86 | AssemblyInfo.FileVersion version ]) 87 | 88 | 89 | // -------------------------------------------------------------------------------------- 90 | // Clean Build Results. 91 | // -------------------------------------------------------------------------------------- 92 | 93 | Target.create "Clean" (fun _ -> 94 | seq { 95 | yield! !! "**/bin" 96 | yield! !! "**/obj" 97 | yield! !! "**/temp" 98 | } 99 | |> Shell.cleanDirs) 100 | 101 | Target.create "CleanDocs" (fun _ -> Shell.cleanDirs [ "output" ]) 102 | 103 | 104 | // -------------------------------------------------------------------------------------- 105 | // Build Library & Test Projects 106 | // -------------------------------------------------------------------------------------- 107 | 108 | Target.create "Build" (fun _ -> 109 | "BioProviders.sln" 110 | |> DotNet.build (fun o -> 111 | { o with 112 | Configuration = DotNet.BuildConfiguration.Release }) 113 | 114 | "BioProviders.TestsAndDocs.sln" 115 | |> DotNet.build (fun o -> 116 | { o with 117 | Configuration = DotNet.BuildConfiguration.Release })) 118 | 119 | Target.create "RunTests" (fun _ -> 120 | let setParams (o: DotNet.TestOptions) = 121 | { o with 122 | Configuration = DotNet.BuildConfiguration.Release 123 | Logger = if isCI then Some "GitHubActions" else None } 124 | 125 | "BioProviders.sln" |> DotNet.test setParams 126 | 127 | "BioProviders.TestsAndDocs.sln" |> DotNet.test setParams) 128 | 129 | 130 | // -------------------------------------------------------------------------------------- 131 | // Build Packages. 132 | // -------------------------------------------------------------------------------------- 133 | 134 | Target.create "Pack" (fun _ -> 135 | // Format the release notes 136 | let releaseNotes = release.Notes |> String.concat "\n" 137 | 138 | let properties = 139 | [ ("Version", release.NugetVersion) 140 | ("Authors", authors) 141 | ("PackageProjectUrl", packageProjectUrl) 142 | ("PackageTags", tags) 143 | ("RepositoryType", repositoryType) 144 | ("RepositoryUrl", repositoryUrl) 145 | ("PackageLicenseExpression", license) 146 | ("PackageReleaseNotes", releaseNotes) 147 | ("Summary", summary) 148 | ("PackageDescription", description) ] 149 | 150 | DotNet.pack 151 | (fun p -> 152 | { p with 153 | Configuration = DotNet.BuildConfiguration.Release 154 | OutputPath = Some "bin" 155 | MSBuildParams = 156 | { p.MSBuildParams with 157 | Properties = properties } }) 158 | "BioProviders.sln") 159 | 160 | 161 | // -------------------------------------------------------------------------------------- 162 | // Generate Documentation. 163 | // -------------------------------------------------------------------------------------- 164 | 165 | Target.create "GenerateDocs" (fun _ -> 166 | printfn "First run of generating documentation (to get API pages)" 167 | Shell.cleanDir ".fsdocs" 168 | 169 | let result1 = 170 | DotNet.exec 171 | id 172 | "fsdocs" 173 | ("build --properties Configuration=Release --eval --clean --parameters fsdocs-package-version " 174 | + release.NugetVersion) 175 | 176 | if not result1.OK then 177 | printfn "Errors while generating docs: %A" result1.Messages 178 | failwith "Failed to generate docs" 179 | 180 | printfn "Moving API pages before cleaning previous output" 181 | 182 | if (Directory.Exists("temp")) then 183 | Shell.cleanDir ("temp") 184 | else 185 | Directory.create ("temp") 186 | 187 | Directory.Move("output/reference", "temp/reference") 188 | printfn "Second run of generating documentation (to get script output)" 189 | Shell.cleanDir ".fsdocs" 190 | Shell.cleanDirs [ "output" ] 191 | 192 | let result2 = 193 | DotNet.exec 194 | id 195 | "fsdocs" 196 | ("build --properties Configuration=Release --eval --noapidocs --clean --parameters fsdocs-package-version " 197 | + release.NugetVersion) 198 | 199 | if not result2.OK then 200 | printfn "Errors while generating docs: %A" result2.Messages 201 | failwith "Failed to generate docs" 202 | 203 | printfn "Moving previous API pages to output" 204 | Directory.Move("temp/reference", "output/reference") 205 | printfn "Deleting temp folder" 206 | Directory.Delete("temp")) 207 | 208 | 209 | // -------------------------------------------------------------------------------------- 210 | // Help. 211 | // -------------------------------------------------------------------------------------- 212 | 213 | Target.create "Help" (fun _ -> 214 | printfn "" 215 | printfn " Please specify the target by calling 'build -t '" 216 | printfn "" 217 | printfn " Targets for building:" 218 | printfn " * Build" 219 | printfn " * RunTests" 220 | printfn " * GenerateDocs" 221 | printfn " * Pack (creates package only, doesn't publish)" 222 | printfn " * All (calls previous 4)" 223 | printfn "") 224 | 225 | let sourceFiles = 226 | !! "src/**/*.fs" ++ "src/**/*.fsi" ++ "build.fsx" 227 | -- "src/**/obj/**/*.fs" 228 | -- "src/AssemblyInfo*.fs" 229 | 230 | Target.create "Format" (fun _ -> 231 | let result = 232 | sourceFiles 233 | |> Seq.map (sprintf "\"%s\"") 234 | |> String.concat " " 235 | |> DotNet.exec id "fantomas" 236 | 237 | if not result.OK then 238 | printfn "Errors while formatting all files: %A" result.Messages) 239 | 240 | Target.create "CheckFormat" (fun _ -> 241 | let result = 242 | sourceFiles 243 | |> Seq.map (sprintf "\"%s\"") 244 | |> String.concat " " 245 | |> sprintf "%s --check" 246 | |> DotNet.exec id "fantomas" 247 | 248 | if result.ExitCode = 0 then 249 | Trace.log "No files need formatting" 250 | elif result.ExitCode = 99 then 251 | failwith "Some files need formatting, run `dotnet fake build -t Format` to format them" 252 | else 253 | Trace.logf "Errors while formatting: %A" result.Errors 254 | failwith "Unknown errors while formatting") 255 | 256 | Target.create "All" ignore 257 | 258 | "Clean" ==> "AssemblyInfo" ==> "CheckFormat" ==> "Build" 259 | 260 | "Build" ==> "CleanDocs" ==> "GenerateDocs" ==> "All" 261 | 262 | "Build" ==> "Pack" ==> "All" 263 | "Build" ==> "All" 264 | "Build" ==> "RunTests" ==> "All" 265 | 266 | Target.runOrDefaultWithArguments "Help" 267 | -------------------------------------------------------------------------------- /build.fsx.lock: -------------------------------------------------------------------------------- 1 | STORAGE: NONE 2 | RESTRICTION: || (== net6.0) (== netstandard2.0) 3 | NUGET 4 | remote: https://api.nuget.org/v3/index.json 5 | BlackFox.VsWhere (1.1) 6 | FSharp.Core (>= 4.2.3) 7 | Microsoft.Win32.Registry (>= 4.7) 8 | Fake.Core.CommandLineParsing (5.23.1) 9 | FParsec (>= 1.1.1) 10 | FSharp.Core (>= 6.0) 11 | Fake.Core.Context (5.23.1) 12 | FSharp.Core (>= 6.0) 13 | Fake.Core.Environment (5.23.1) 14 | FSharp.Core (>= 6.0) 15 | Fake.Core.FakeVar (5.23.1) 16 | Fake.Core.Context (>= 5.23.1) 17 | FSharp.Core (>= 6.0) 18 | Fake.Core.Process (5.23.1) 19 | Fake.Core.Environment (>= 5.23.1) 20 | Fake.Core.FakeVar (>= 5.23.1) 21 | Fake.Core.String (>= 5.23.1) 22 | Fake.Core.Trace (>= 5.23.1) 23 | Fake.IO.FileSystem (>= 5.23.1) 24 | FSharp.Core (>= 6.0) 25 | System.Collections.Immutable (>= 5.0) 26 | Fake.Core.SemVer (5.23.1) 27 | FSharp.Core (>= 6.0) 28 | Fake.Core.String (5.23.1) 29 | FSharp.Core (>= 6.0) 30 | Fake.Core.Target (5.23.1) 31 | Fake.Core.CommandLineParsing (>= 5.23.1) 32 | Fake.Core.Context (>= 5.23.1) 33 | Fake.Core.Environment (>= 5.23.1) 34 | Fake.Core.FakeVar (>= 5.23.1) 35 | Fake.Core.Process (>= 5.23.1) 36 | Fake.Core.String (>= 5.23.1) 37 | Fake.Core.Trace (>= 5.23.1) 38 | FSharp.Control.Reactive (>= 5.0.2) 39 | FSharp.Core (>= 6.0) 40 | Fake.Core.Tasks (5.23.1) 41 | Fake.Core.Trace (>= 5.23.1) 42 | FSharp.Core (>= 6.0) 43 | Fake.Core.Trace (5.23.1) 44 | Fake.Core.Environment (>= 5.23.1) 45 | Fake.Core.FakeVar (>= 5.23.1) 46 | FSharp.Core (>= 6.0) 47 | Fake.Core.Xml (5.23.1) 48 | Fake.Core.String (>= 5.23.1) 49 | FSharp.Core (>= 6.0) 50 | Fake.DotNet.Cli (5.23.1) 51 | Fake.Core.Environment (>= 5.23.1) 52 | Fake.Core.Process (>= 5.23.1) 53 | Fake.Core.String (>= 5.23.1) 54 | Fake.Core.Trace (>= 5.23.1) 55 | Fake.DotNet.MSBuild (>= 5.23.1) 56 | Fake.DotNet.NuGet (>= 5.23.1) 57 | Fake.IO.FileSystem (>= 5.23.1) 58 | FSharp.Core (>= 6.0) 59 | Mono.Posix.NETStandard (>= 1.0) 60 | Newtonsoft.Json (>= 13.0.1) 61 | Fake.DotNet.MSBuild (5.23.1) 62 | BlackFox.VsWhere (>= 1.1) 63 | Fake.Core.Environment (>= 5.23.1) 64 | Fake.Core.Process (>= 5.23.1) 65 | Fake.Core.String (>= 5.23.1) 66 | Fake.Core.Trace (>= 5.23.1) 67 | Fake.IO.FileSystem (>= 5.23.1) 68 | FSharp.Core (>= 6.0) 69 | MSBuild.StructuredLogger (>= 2.1.545) 70 | Fake.DotNet.NuGet (5.23.1) 71 | Fake.Core.Environment (>= 5.23.1) 72 | Fake.Core.Process (>= 5.23.1) 73 | Fake.Core.SemVer (>= 5.23.1) 74 | Fake.Core.String (>= 5.23.1) 75 | Fake.Core.Tasks (>= 5.23.1) 76 | Fake.Core.Trace (>= 5.23.1) 77 | Fake.Core.Xml (>= 5.23.1) 78 | Fake.IO.FileSystem (>= 5.23.1) 79 | Fake.Net.Http (>= 5.23.1) 80 | FSharp.Core (>= 6.0) 81 | Newtonsoft.Json (>= 13.0.1) 82 | NuGet.Protocol (>= 5.11) 83 | Fake.IO.FileSystem (5.23.1) 84 | Fake.Core.String (>= 5.23.1) 85 | FSharp.Core (>= 6.0) 86 | Fake.Net.Http (5.23.1) 87 | Fake.Core.Trace (>= 5.23.1) 88 | FSharp.Core (>= 6.0) 89 | FParsec (1.1.1) 90 | FSharp.Core (>= 4.3.4) 91 | FSharp.Control.Reactive (5.0.5) 92 | FSharp.Core (>= 4.7.2) 93 | System.Reactive (>= 5.0 < 6.0) 94 | FSharp.Core (7.0) 95 | Microsoft.Build (17.4) 96 | Microsoft.Build.Framework (17.4) 97 | Microsoft.Win32.Registry (>= 5.0) 98 | System.Security.Permissions (>= 6.0) 99 | Microsoft.Build.Tasks.Core (17.4) 100 | Microsoft.Build.Framework (>= 17.4) 101 | Microsoft.Build.Utilities.Core (>= 17.4) 102 | Microsoft.NET.StringTools (>= 17.4) 103 | Microsoft.Win32.Registry (>= 5.0) 104 | System.CodeDom (>= 6.0) 105 | System.Collections.Immutable (>= 6.0) 106 | System.Reflection.Metadata (>= 6.0) 107 | System.Resources.Extensions (>= 6.0) 108 | System.Security.Cryptography.Pkcs (>= 6.0.1) 109 | System.Security.Cryptography.Xml (>= 6.0) 110 | System.Security.Permissions (>= 6.0) 111 | System.Threading.Tasks.Dataflow (>= 6.0) 112 | Microsoft.Build.Utilities.Core (17.4) 113 | Microsoft.Build.Framework (>= 17.4) 114 | Microsoft.NET.StringTools (>= 17.4) 115 | Microsoft.Win32.Registry (>= 5.0) 116 | System.Collections.Immutable (>= 6.0) 117 | System.Configuration.ConfigurationManager (>= 6.0) 118 | System.Security.Permissions (>= 6.0) 119 | System.Text.Encoding.CodePages (>= 6.0) 120 | Microsoft.NET.StringTools (17.4) 121 | System.Memory (>= 4.5.5) 122 | System.Runtime.CompilerServices.Unsafe (>= 6.0) 123 | Microsoft.NETCore.Platforms (7.0) - restriction: || (&& (== net6.0) (< netcoreapp3.1)) (&& (== net6.0) (< netstandard1.2)) (&& (== net6.0) (< netstandard1.3)) (&& (== net6.0) (< netstandard1.5)) (== netstandard2.0) 124 | Microsoft.NETCore.Targets (5.0) - restriction: || (&& (== net6.0) (< netcoreapp3.1)) (&& (== net6.0) (< netstandard1.2)) (&& (== net6.0) (< netstandard1.3)) (&& (== net6.0) (< netstandard1.5)) (== netstandard2.0) 125 | Microsoft.Win32.Registry (5.0) 126 | System.Buffers (>= 4.5.1) - restriction: || (&& (== net6.0) (>= monoandroid) (< netstandard1.3)) (&& (== net6.0) (>= monotouch)) (&& (== net6.0) (< netcoreapp2.0)) (&& (== net6.0) (>= xamarinios)) (&& (== net6.0) (>= xamarinmac)) (&& (== net6.0) (>= xamarintvos)) (&& (== net6.0) (>= xamarinwatchos)) (== netstandard2.0) 127 | System.Memory (>= 4.5.4) - restriction: || (&& (== net6.0) (< netcoreapp2.0)) (&& (== net6.0) (< netcoreapp2.1)) (&& (== net6.0) (>= uap10.1)) (== netstandard2.0) 128 | System.Security.AccessControl (>= 5.0) 129 | System.Security.Principal.Windows (>= 5.0) 130 | Microsoft.Win32.SystemEvents (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) 131 | Mono.Posix.NETStandard (1.0) 132 | MSBuild.StructuredLogger (2.1.758) 133 | Microsoft.Build (>= 16.10) 134 | Microsoft.Build.Framework (>= 16.10) 135 | Microsoft.Build.Tasks.Core (>= 16.10) 136 | Microsoft.Build.Utilities.Core (>= 16.10) 137 | Newtonsoft.Json (13.0.2) 138 | NuGet.Common (6.4) 139 | NuGet.Frameworks (>= 6.4) 140 | NuGet.Configuration (6.4) 141 | NuGet.Common (>= 6.4) 142 | System.Security.Cryptography.ProtectedData (>= 4.4) 143 | NuGet.Frameworks (6.4) 144 | NuGet.Packaging (6.4) 145 | Newtonsoft.Json (>= 13.0.1) 146 | NuGet.Configuration (>= 6.4) 147 | NuGet.Versioning (>= 6.4) 148 | System.Security.Cryptography.Cng (>= 5.0) 149 | System.Security.Cryptography.Pkcs (>= 5.0) 150 | NuGet.Protocol (6.4) 151 | NuGet.Packaging (>= 6.4) 152 | NuGet.Versioning (6.4) 153 | System.Buffers (4.5.1) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 154 | System.CodeDom (7.0) 155 | System.Collections.Immutable (7.0) 156 | System.Memory (>= 4.5.5) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 157 | System.Runtime.CompilerServices.Unsafe (>= 6.0) 158 | System.Configuration.ConfigurationManager (7.0) 159 | System.Security.Cryptography.ProtectedData (>= 7.0) 160 | System.Security.Permissions (>= 7.0) 161 | System.Drawing.Common (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) 162 | Microsoft.Win32.SystemEvents (>= 7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) 163 | System.Formats.Asn1 (7.0) 164 | System.Buffers (>= 4.5.1) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 165 | System.Memory (>= 4.5.5) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 166 | System.Memory (4.5.5) 167 | System.Buffers (>= 4.5.1) - restriction: || (&& (== net6.0) (>= monotouch)) (&& (== net6.0) (>= net461)) (&& (== net6.0) (< netcoreapp2.0)) (&& (== net6.0) (< netstandard1.1)) (&& (== net6.0) (< netstandard2.0)) (&& (== net6.0) (>= xamarinios)) (&& (== net6.0) (>= xamarinmac)) (&& (== net6.0) (>= xamarintvos)) (&& (== net6.0) (>= xamarinwatchos)) (== netstandard2.0) 168 | System.Numerics.Vectors (>= 4.4) - restriction: || (&& (== net6.0) (< netcoreapp2.0)) (== netstandard2.0) 169 | System.Runtime.CompilerServices.Unsafe (>= 4.5.3) - restriction: || (&& (== net6.0) (>= monotouch)) (&& (== net6.0) (>= net461)) (&& (== net6.0) (< netcoreapp2.0)) (&& (== net6.0) (< netcoreapp2.1)) (&& (== net6.0) (< netstandard1.1)) (&& (== net6.0) (< netstandard2.0)) (&& (== net6.0) (>= uap10.1)) (&& (== net6.0) (>= xamarinios)) (&& (== net6.0) (>= xamarinmac)) (&& (== net6.0) (>= xamarintvos)) (&& (== net6.0) (>= xamarinwatchos)) (== netstandard2.0) 170 | System.Numerics.Vectors (4.5) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 171 | System.Reactive (5.0) 172 | System.Runtime.InteropServices.WindowsRuntime (>= 4.3) - restriction: || (&& (== net6.0) (< netcoreapp3.1)) (== netstandard2.0) 173 | System.Threading.Tasks.Extensions (>= 4.5.4) - restriction: || (&& (== net6.0) (>= net472)) (&& (== net6.0) (< netcoreapp3.1)) (&& (== net6.0) (>= uap10.1)) (== netstandard2.0) 174 | System.Reflection.Metadata (7.0) 175 | System.Collections.Immutable (>= 7.0) 176 | System.Memory (>= 4.5.5) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 177 | System.Resources.Extensions (7.0) 178 | System.Memory (>= 4.5.5) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 179 | System.Runtime (4.3.1) - restriction: || (&& (== net6.0) (< netcoreapp3.1)) (== netstandard2.0) 180 | Microsoft.NETCore.Platforms (>= 1.1.1) 181 | Microsoft.NETCore.Targets (>= 1.1.3) 182 | System.Runtime.CompilerServices.Unsafe (6.0) 183 | System.Runtime.InteropServices.WindowsRuntime (4.3) - restriction: || (&& (== net6.0) (< netcoreapp3.1)) (== netstandard2.0) 184 | System.Runtime (>= 4.3) 185 | System.Security.AccessControl (6.0) 186 | System.Security.Principal.Windows (>= 5.0) - restriction: || (&& (== net6.0) (>= net461)) (== netstandard2.0) 187 | System.Security.Cryptography.Cng (5.0) 188 | System.Formats.Asn1 (>= 5.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= netcoreapp3.0)) 189 | System.Security.Cryptography.Pkcs (7.0) 190 | System.Buffers (>= 4.5.1) - restriction: || (&& (== net6.0) (< netstandard2.1)) (== netstandard2.0) 191 | System.Formats.Asn1 (>= 7.0) 192 | System.Memory (>= 4.5.5) - restriction: || (&& (== net6.0) (< netstandard2.1)) (== netstandard2.0) 193 | System.Security.Cryptography.Cng (>= 5.0) - restriction: || (&& (== net6.0) (< netstandard2.1)) (== netstandard2.0) 194 | System.Security.Cryptography.ProtectedData (7.0) 195 | System.Security.Cryptography.Xml (7.0.1) 196 | System.Memory (>= 4.5.5) - restriction: == netstandard2.0 197 | System.Security.AccessControl (>= 6.0) - restriction: == netstandard2.0 198 | System.Security.Cryptography.Pkcs (>= 7.0) 199 | System.Security.Permissions (7.0) 200 | System.Security.AccessControl (>= 6.0) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 201 | System.Windows.Extensions (>= 7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) 202 | System.Security.Principal.Windows (5.0) 203 | System.Text.Encoding.CodePages (7.0) 204 | System.Memory (>= 4.5.5) - restriction: || (&& (== net6.0) (>= net462)) (== netstandard2.0) 205 | System.Runtime.CompilerServices.Unsafe (>= 6.0) 206 | System.Threading.Tasks.Dataflow (7.0) 207 | System.Threading.Tasks.Extensions (4.5.4) - restriction: || (&& (== net6.0) (>= net472)) (&& (== net6.0) (< netcoreapp3.1)) (&& (== net6.0) (>= uap10.1)) (== netstandard2.0) 208 | System.Runtime.CompilerServices.Unsafe (>= 4.5.3) - restriction: || (&& (== net6.0) (>= net461)) (&& (== net6.0) (< netcoreapp2.1)) (&& (== net6.0) (< netstandard1.0)) (&& (== net6.0) (< netstandard2.0)) (&& (== net6.0) (>= wp8)) (== netstandard2.0) 209 | System.Windows.Extensions (7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) 210 | System.Drawing.Common (>= 7.0) - restriction: || (== net6.0) (&& (== netstandard2.0) (>= net6.0)) 211 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | set -o pipefail 5 | 6 | dotnet tool restore 7 | dotnet paket restore 8 | dotnet fake build -t "$@" -------------------------------------------------------------------------------- /build/data/genbank-assemblies-#.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-#.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-a.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-a.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-b.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-b.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-c.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-c.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-d.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-d.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-e.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-e.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-f.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-f.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-g.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-g.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-h.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-h.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-i.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-i.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-j.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-j.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-k.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-k.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-l.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-l.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-m.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-m.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-n.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-n.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-o.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-o.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-p.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-p.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-q.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-q.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-r.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-r.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-s.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-s.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-t.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-t.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-u.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-u.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-v.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-v.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-w.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-w.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-x.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-x.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-y.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-y.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-assemblies-z.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-assemblies-z.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-#.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-#.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-a.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-a.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-b.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-b.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-c.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-c.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-d.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-d.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-e.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-e.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-f.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-f.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-g.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-g.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-h.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-h.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-i.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-i.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-j.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-j.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-k.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-k.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-l.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-l.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-m.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-m.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-n.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-n.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-o.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-o.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-p.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-p.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-q.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-q.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-r.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-r.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-s.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-s.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-t.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-t.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-u.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-u.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-v.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-v.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-w.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-w.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-x.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-x.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-y.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-y.txt.gz -------------------------------------------------------------------------------- /build/data/genbank-species-z.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/genbank-species-z.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-#.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-#.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-a.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-a.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-b.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-b.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-c.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-c.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-d.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-d.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-e.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-e.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-f.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-f.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-g.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-g.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-h.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-h.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-i.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-i.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-j.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-j.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-k.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-k.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-l.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-l.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-m.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-m.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-n.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-n.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-o.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-o.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-p.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-p.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-q.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-q.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-r.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-r.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-s.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-s.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-t.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-t.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-u.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-u.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-v.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-v.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-w.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-w.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-x.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-x.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-y.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-y.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-assemblies-z.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-assemblies-z.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-#.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-#.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-a.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-a.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-b.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-b.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-c.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-c.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-d.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-d.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-e.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-e.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-f.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-f.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-g.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-g.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-h.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-h.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-i.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-i.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-j.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-j.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-k.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-k.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-l.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-l.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-m.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-m.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-n.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-n.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-o.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-o.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-p.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-p.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-q.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-q.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-r.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-r.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-s.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-s.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-t.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-t.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-u.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-u.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-v.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-v.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-w.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-w.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-x.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-x.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-y.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-y.txt.gz -------------------------------------------------------------------------------- /build/data/refseq-species-z.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/build/data/refseq-species-z.txt.gz -------------------------------------------------------------------------------- /docs/_template.fsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/_template.fsx -------------------------------------------------------------------------------- /docs/_template.html: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | {{fsdocs-page-title}} 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 25 | 26 | 27 | 28 | {{fsdocs-watch-script}} 29 | 30 | 31 | 32 | 64 |
65 | 68 |
69 |
70 | {{fsdocs-content}} 71 | {{fsdocs-tooltips}} 72 |
73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 |
82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/_template.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/_template.ipynb -------------------------------------------------------------------------------- /docs/img/GenBankProvider.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/img/GenBankProvider.gif -------------------------------------------------------------------------------- /docs/img/GenBank_Info.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/img/GenBank_Info.gif -------------------------------------------------------------------------------- /docs/img/RefSeq_Info.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/img/RefSeq_Info.gif -------------------------------------------------------------------------------- /docs/img/badge-notebook.svg: -------------------------------------------------------------------------------- 1 | Download notebookDownload notebook -------------------------------------------------------------------------------- /docs/img/badge-script.svg: -------------------------------------------------------------------------------- 1 | Download scriptDownload script -------------------------------------------------------------------------------- /docs/img/logo.pdn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/img/logo.pdn -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fsprojects/BioProviders/995ed64aca70cda3ffeb42556c0104ac7105b242/docs/img/logo.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | BioProviders: Simplifying Access to Bioinformatic Datasets 2 | ================================ 3 | 4 | The BioProviders package provides tools and functionality to simplify accessing and manipulating bioinformatic data. 5 | The [.NET Bio](https://github.com/dotnetbio/bio) and [BioFSharp](https://github.com/CSBiology/BioFSharp) libraries 6 | are used to parse and format the data provided by this package. 7 | 8 | BioProviders is available through [NuGet](https://nuget.org/packages/BioProviders). 9 | [![NuGet Status](//img.shields.io/nuget/v/BioProviders.svg?style=flat)](https://www.nuget.org/packages/BioProviders/) 10 | 11 | 12 | ## Type Providers 13 | 14 |
15 |
16 |
17 |
18 | 19 |
20 |
21 |
22 |
23 | 24 | BioProviders implements Type Providers for accessing bioinformatic datasets. These Type Providers allow remote access 25 | to data sources (e.g., GenBank) and type-safe representations of their data (e.g., GenBank Flat File). 26 | 27 | * [GenBank Type Provider](library/GenBankProvider.html) - access to GenBank data using the `GenBankProvider<..>` type. 28 | * [RefSeq Type Provider](library/RefSeqProvider.html) - access to RefSeq data using the `RefSeqProvider<..>` type. 29 | 30 | 31 | ## Contributing and Copyright 32 | 33 | The project is hosted on [GitHub](https://github.com/AlexKenna/BioProviders) where you can 34 | [report issues](https://github.com/AlexKenna/BioProviders/issues), fork the project and submit pull requests. 35 | 36 | The library is available under the OSI-approved MIT license. For more information see the 37 | [License file](https://github.com/AlexKenna/BioProviders/blob/main/LICENSE.md) in the GitHub repository. -------------------------------------------------------------------------------- /docs/library/GenBankProvider.fsx: -------------------------------------------------------------------------------- 1 | (** 2 | --- 3 | category: Type Providers 4 | categoryindex: 1 5 | index: 1 6 | --- 7 | *) 8 | 9 | (** 10 | 11 | [![Script](../img/badge-script.svg)]({{root}}/{{fsdocs-source-basename}}.fsx)  12 | [![Notebook](../img/badge-notebook.svg)]({{root}}/{{fsdocs-source-basename}}.ipynb) 13 | 14 | # GenBank Type Provider 15 | 16 | This article describes how to use the GenBank Type Provider to remotely access genomic data stored in the 17 | [GenBank](https://www.ncbi.nlm.nih.gov/genbank/) database. This Type Provider collects and parses the genomic data 18 | for a specified organism and generates a static type containing its metadata and sequence. 19 | 20 | The GenBank Type Provider uses [.NET Bio](https://github.com/dotnetbio/bio) to parse the GenBank data files 21 | and [BioFSharp](https://github.com/CSBiology/BioFSharp) to provide utilities for manipulating genomic sequences. 22 | 23 | ## Loading BioProviders Package 24 | 25 | To load the GenBank Type Provider, a script can use the NuGet syntax to reference the BioProviders package, shown below. 26 | 27 | You can optionally include the BioFSharp package. While it's not required to use the basic BioProviders functions, it can be used to explore the metadata of the provided types, as shown in a later example. 28 | *) 29 | 30 | #r "nuget: BioProviders" 31 | #r "nuget: BioFSharp" 32 | 33 | (** If creating an F# library or application, BioProviders can be added as a package reference. You can use your IDE for this, or use the ```dotnet add package BioProviders``` command in your project folder from the command line. 34 | 35 | BioProviders can then be used in your script or code by using an open command. Opening its dependencies should not be required. (BioFSharp is loaded for future examples.) 36 | *) 37 | 38 | open BioProviders 39 | open BioFSharp 40 | 41 | (** 42 | ## GenBankProvider Example 43 | 44 | The GenBank Type Provider will be demonstrated for [this GenBank assembly](https://www.ncbi.nlm.nih.gov/nuccore/CP012411) 45 | of the *Candidatus Carsonella ruddii* species. To create a typed representation of the assembly, two pieces of information 46 | must be given to the Type Provider: 47 | 48 | * Species name 49 | * GenBank assembly accession 50 | 51 | For this example, the species name is "Candidatus Carsonella ruddii" and the GenBank assembly accession is "GCA_001274515.1". 52 | To find this information: 53 | 54 | * Visit https://www.ncbi.nlm.nih.gov/datasets/ 55 | * Search for the name of the species 56 | * Select to view all genones of the species 57 | 58 | You can then select the assembly's GenBank (as well as RefSeq) accession from the list that appears. 59 | 60 | ![Animation of findng a GenBank assembly accession on NCBI.](../img/GenBank_Info.gif "Animation of findng a GenBank assembly accession on NCBI."). 61 | 62 | Passing this information to the Type Provider generates the Assembly Type. The genomic data can then be extracted from the 63 | Assembly Type by invoking the Genome method. This is demonstrated below. 64 | *) 65 | 66 | // Define species name and GenBank assembly accession. 67 | let [] Species = "Candidatus Carsonella ruddii" 68 | let [] Accession = "GCA_001274515.1" 69 | 70 | // Create GenBank assembly type. 71 | type Ruddii = GenBankProvider 72 | 73 | // Extract statically-typed genome data. 74 | let genome = Ruddii.Genome() 75 | 76 | (** 77 | ### Metadata 78 | 79 | Each genome is accompanied by metadata describing the organism and sequence recorded in the assembly. This metadata can 80 | be extracted using the Metadata field of the Genome Type created previously. The Metadata type is largely based on that 81 | provided by [.NET Bio](http://dotnetbio.github.io/Help/html/319bf2e6-4fcf-9f93-586f-fc7ffcf04a83.htm), with modifications 82 | made to be more idiomatic with F#. 83 | 84 | Below is an example of how the raw metadata type can be retrieved and displayed: 85 | 86 | *) 87 | 88 | // Extract the metadata. 89 | let metadata = genome.Metadata 90 | 91 | // Display the metadata type. 92 | printf "%A" metadata 93 | 94 | (*** include-output ***) 95 | 96 | (** 97 | The metadata type consists of many fields, though not all fields of the metadata exist for all assemblies. Therefore, they are provided as option types, on which a match expression can be used. Below are examples of accessing fields from the example assembly. 98 | ✅ Example - Accessing a field that is provided. 99 | *) 100 | 101 | // Print definition if exists. 102 | match metadata.Definition with 103 | | Some definition -> printf "%s" definition 104 | | None -> printf "No definition provided." 105 | 106 | (*** include-output ***) 107 | 108 | (** 109 | ❌ Example - Accessing a field that is not provided. 110 | *) 111 | 112 | // Print database source if exists. 113 | match metadata.DbSource with 114 | | Some dbsource -> printf "%s" dbsource 115 | | None -> printf "No database source provided." 116 | 117 | (*** include-output ***) 118 | 119 | (** 120 | ### Sequence 121 | 122 | The genomic sequence for the organism can be extracted using the Sequence field of the Genome Type created previously. 123 | This field provides a BioFSharp [BioSeq](https://csbiology.github.io/BioFSharp/reference/biofsharp-bioseq.html) containing 124 | a series of [Nucleotides](https://csbiology.github.io/BioFSharp//reference/biofsharp-nucleotides-nucleotide.html). More 125 | can be read about BioFSharp containers [here](https://csbiology.github.io/BioFSharp//BioCollections.html). 126 | 127 | An example of accessing and manipulating the GenBankProvider genomic sequence using BioFSharp is provided below: 128 | *) 129 | 130 | // Extract the BioFSharp BioSeq. 131 | let sequence = genome.Sequence 132 | 133 | // Display the sequence type. 134 | printf "%A" sequence 135 | 136 | (*** include-output ***) 137 | 138 | // Take the complement, then transcribe and translate the coding strand. 139 | sequence 140 | |> BioSeq.complement 141 | |> BioSeq.transcribeCodingStrand 142 | |> BioSeq.translate 0 143 | 144 | (*** include-it ***) 145 | 146 | 147 | (** 148 | ## Wildcard Operators 149 | 150 | Wildcard operators are supported in both the Species and Accession provided to the GenBankProvider. By using asterisks "\*" 151 | at the end of a Species or Accession name, species or accessions starting with the provided pattern will be matched. 152 | 153 | For example, we can get all *Staphylococcus* species starting with the letter 'c' and assembly accesions starting with 154 | 'GCA_01': 155 | *) 156 | 157 | // Define species name and GenBank assembly accession using wildcards. 158 | let [] SpeciesPattern = "Staphylococcus c*" 159 | let [] AccessionPattern = "GCA_01*" 160 | 161 | // Create GenBank type containing all species matching the species pattern. 162 | type SpeciesCollection = GenBankProvider 163 | 164 | // Select the species types. 165 | type Capitis = SpeciesCollection.``Staphylococcus capitis`` 166 | type Cohnii = SpeciesCollection.``Staphylococcus cohnii`` 167 | 168 | // Select assemblies. 169 | type Assembly1 = Capitis.``GCA_012926605.1`` 170 | type Assembly2 = Capitis.``GCA_015645205.1`` 171 | type Assembly3 = Cohnii.``GCA_013349225.1`` 172 | type Assembly4 = Cohnii.``GCA_014884245.1`` 173 | 174 | // Extract statically-typed genome data. 175 | let data = Assembly1.Genome() 176 | 177 | // Show the assembly's definition. 178 | match data.Metadata.Definition with 179 | | Some definition -> printf "%s" definition 180 | | None -> printf "No definition provided." 181 | 182 | (*** include-output ***) 183 | 184 | (** 185 | The Accession parameter can also be omitted from the GenBankProvider. In this case, all assemblies for the given species will 186 | be matched. For example: 187 | *) 188 | 189 | // Define species name. 190 | let [] SpeciesName = "Staphylococcus lugdunensis" 191 | 192 | // Create GenBank type containing all assemblies for the species. 193 | type Assemblies = GenBankProvider 194 | 195 | // Select assemblies. 196 | type Assembly = Assemblies.``GCA_001546615.1`` 197 | 198 | // Show the assembly's primary accession. 199 | match (Assembly.Genome()).Metadata.Accession with 200 | | Some accession -> match accession.Primary with 201 | | Some primary -> printf "%s" primary 202 | | None -> printf "No primary accession provided." 203 | | None -> printf "No accession provided." 204 | 205 | (*** include-output ***) -------------------------------------------------------------------------------- /docs/library/RefSeqProvider.fsx: -------------------------------------------------------------------------------- 1 | (** 2 | --- 3 | category: Type Providers 4 | categoryindex: 1 5 | index: 2 6 | --- 7 | *) 8 | 9 | (** 10 | 11 | [![Script](../img/badge-script.svg)]({{root}}/{{fsdocs-source-basename}}.fsx)  12 | [![Notebook](../img/badge-notebook.svg)]({{root}}/{{fsdocs-source-basename}}.ipynb) 13 | 14 | # RefSeq Type Provider 15 | 16 | This article describes how to use the RefSeq Type Provider to remotely access genomic data stored in the 17 | [RefSeq](https://www.ncbi.nlm.nih.gov/genbank/) database. This Type Provider collects and parses the genomic data 18 | for a specified organism and generates a static type containing its metadata and sequence. 19 | 20 | The RefSeq Type Provider uses [.NET Bio](https://github.com/dotnetbio/bio) to parse the RefSeq data files 21 | and [BioFSharp](https://github.com/CSBiology/BioFSharp) to provide utilities for manipulating genomic sequences. 22 | 23 | ## Loading BioProviders Package 24 | 25 | To load the RefSeq Type Provider, a script can use the NuGet syntax to reference the BioProviders package, shown below. 26 | 27 | You can optionally include the BioFSharp package. While it's not required to use the basic BioProviders functions, it can be used to explore the metadata of the provided types, as shown in a later example. 28 | *) 29 | 30 | #r "nuget: BioProviders" 31 | #r "nuget: BioFSharp" 32 | 33 | (** If creating an F# library or application, BioProviders can be added as a package reference. You can use your IDE for this, or use the ```dotnet add package BioProviders``` command in your project folder from the command line. 34 | 35 | BioProviders can then be used in your script or code by using an open command. Opening its dependencies should not be required. (BioFSharp is loaded for future examples.) 36 | *) 37 | 38 | open BioProviders 39 | open BioFSharp 40 | 41 | (** 42 | ## RefSeqProvider Example 43 | 44 | The RefSeq Type Provider will be demonstrated for [this RefSeq assembly](https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_001224225.1/) 45 | of the *Staphylococcus borealis* species. To create a typed representation of the assembly, two pieces of information 46 | must be given to the Type Provider: 47 | 48 | * Species name 49 | * RefSeq assembly accession 50 | 51 | For this example, the species name is "Staphylococcus borealis" and the RefSeq assembly accession is "GCF_001224225.1". 52 | To find this information: 53 | 54 | * Visit https://www.ncbi.nlm.nih.gov/datasets/ 55 | * Search for the name of the species 56 | * Select to view all genones of the species 57 | 58 | You can then select the assembly's RefSeq (as well as GenBank) accession from the list that appears. 59 | 60 | ![Animation of findng a RefSeq assembly accession on NCBI.](../img/RefSeq_Info.gif "Animation of findng a RefSeq assembly accession on NCBI."). 61 | 62 | Passing this information to the Type Provider generates the Assembly Type. The genomic data can then be extracted from the 63 | Assembly Type by invoking the Genome method. This is demonstrated below. 64 | *) 65 | 66 | // Define species name and RefSeq assembly accession. 67 | let [] Species = "Staphylococcus borealis" 68 | let [] Accession = "GCF_001224225.1" 69 | 70 | // Create RefSeq assembly type. 71 | type Borealis = RefSeqProvider 72 | 73 | // Extract statically-typed genome data. 74 | let genome = Borealis.Genome() 75 | 76 | (** 77 | ### Metadata 78 | 79 | Each genome is accompanied by metadata describing the organism and sequence recorded in the assembly. This metadata can 80 | be extracted using the Metadata field of the Genome Type created previously. The Metadata type is largely based on that 81 | provided by [.NET Bio](http://dotnetbio.github.io/Help/html/319bf2e6-4fcf-9f93-586f-fc7ffcf04a83.htm), with modifications 82 | made to be more idiomatic with F#. 83 | 84 | Below is an example of how the raw metadata type can be retrieved and displayed: 85 | 86 | *) 87 | 88 | // Extract the metadata. 89 | let metadata = genome.Metadata 90 | 91 | // Display the metadata type. 92 | printf "%A" metadata 93 | 94 | (*** include-output ***) 95 | 96 | (** 97 | The metadata type consists of many fields, though not all fields of the metadata exist for all assemblies. Therefore, they are provided as option types, on which a match expression can be used. Below are examples of accessing fields from the example assembly. 98 | ✅ Example - Accessing a field that is provided. 99 | *) 100 | 101 | // Print definition if exists. 102 | match metadata.Definition with 103 | | Some definition -> printf "%s" definition 104 | | None -> printf "No definition provided." 105 | 106 | (*** include-output ***) 107 | 108 | (** 109 | ❌ Example - Accessing a field that is not provided. 110 | *) 111 | 112 | // Print database source if exists. 113 | match metadata.DbSource with 114 | | Some dbsource -> printf "%s" dbsource 115 | | None -> printf "No database source provided." 116 | 117 | (*** include-output ***) 118 | 119 | (** 120 | ### Sequence 121 | 122 | The genomic sequence for the organism can be extracted using the Sequence field of the Genome Type created previously. 123 | This field provides a BioFSharp [BioSeq](https://csbiology.github.io/BioFSharp/reference/biofsharp-bioseq.html) containing 124 | a series of [Nucleotides](https://csbiology.github.io/BioFSharp//reference/biofsharp-nucleotides-nucleotide.html). More 125 | can be read about BioFSharp containers [here](https://csbiology.github.io/BioFSharp//BioCollections.html). 126 | 127 | An example of accessing and manipulating the RefSeqProvider genomic sequence using BioFSharp is provided below: 128 | *) 129 | 130 | // Extract the BioFSharp BioSeq. 131 | let sequence = genome.Sequence 132 | 133 | // Display the sequence type. 134 | printf "%A" sequence 135 | 136 | (*** include-output ***) 137 | 138 | // Take the complement, then transcribe and translate the coding strand. 139 | sequence 140 | |> BioSeq.complement 141 | |> BioSeq.transcribeCodingStrand 142 | |> BioSeq.translate 0 143 | 144 | (*** include-it ***) 145 | 146 | 147 | (** 148 | ## Wildcard Operators 149 | 150 | Wildcard operators are supported in both the Species and Accession provided to the RefSeqProvider. By using asterisks "\*" 151 | at the end of a Species or Accession name, species or accessions starting with the provided pattern will be matched. 152 | 153 | For example, we can get all *Staphylococcus* species starting with the letter 'c' and assembly accesions starting with 154 | 'GCF_01': 155 | *) 156 | 157 | // Define species name and RefSeq assembly accession using wildcards. 158 | let [] SpeciesPattern = "Staphylococcus c*" 159 | let [] AccessionPattern = "GCF_01*" 160 | 161 | // Create RefSeq type containing all species matching the species pattern. 162 | type SpeciesCollection = RefSeqProvider 163 | 164 | // Select the species types. 165 | type Capitis = SpeciesCollection.``Staphylococcus capitis`` 166 | type Cohnii = SpeciesCollection.``Staphylococcus cohnii`` 167 | 168 | // Select assemblies. 169 | type Assembly1 = Capitis.``GCF_012926605.1`` 170 | type Assembly2 = Capitis.``GCF_012926635.1`` 171 | type Assembly3 = Cohnii.``GCF_013602215.1`` 172 | type Assembly4 = Cohnii.``GCF_013602265.1`` 173 | 174 | // Extract statically-typed genome data. 175 | let data = Assembly1.Genome() 176 | 177 | // Show the assembly's definition. 178 | match data.Metadata.Definition with 179 | | Some definition -> printf "%s" definition 180 | | None -> printf "No definition provided." 181 | 182 | (*** include-output ***) 183 | 184 | (** 185 | The Accession parameter can also be omitted from the RefSeqProvider. In this case, all assemblies for the given species will 186 | be matched. For example: 187 | *) 188 | 189 | // Define species name. 190 | let [] SpeciesName = "Staphylococcus lugdunensis" 191 | 192 | // Create RefSeq type containing all assemblies for the species. 193 | type Assemblies = RefSeqProvider 194 | 195 | // Select assemblies. 196 | type Assembly = Assemblies.``GCF_001546615.1`` 197 | 198 | // Show the assembly's primary accession. 199 | match (Assembly.Genome()).Metadata.Accession with 200 | | Some accession -> match accession.Primary with 201 | | Some primary -> printf "%s" primary 202 | | None -> printf "No primary accession provided." 203 | | None -> printf "No accession provided." 204 | 205 | (*** include-output ***) -------------------------------------------------------------------------------- /global.json: -------------------------------------------------------------------------------- 1 | { 2 | "sdk": { 3 | "version": "6.0.401", 4 | "rollForward": "minor" 5 | } 6 | } -------------------------------------------------------------------------------- /paket.dependencies: -------------------------------------------------------------------------------- 1 | version 6.2.1 2 | framework: net6.0, netstandard2.0 3 | source https://api.nuget.org/v3/index.json 4 | 5 | github fsprojects/FSharp.TypeProviders.SDK src/ProvidedTypes.fsi 6 | github fsprojects/FSharp.TypeProviders.SDK src/ProvidedTypes.fs 7 | 8 | nuget FSharp.Core 6.0.0.0 lowest_matching: true 9 | nuget FluentFTP 34.0.1 10 | nuget NetBio.Core 11 | 12 | git https://github.com/CSBiology/BioFSharp.git nuget Packages: / 13 | nuget BioFSharp 14 | nuget FSharp.Data 15 | 16 | group Build 17 | source https://api.nuget.org/v3/index.json 18 | storage: none 19 | 20 | nuget Fake.Testing.Common 5.20.3 21 | nuget Fake.Net.Http 5.20.3 22 | nuget Fake.IO.FileSystem 5.20.3 23 | nuget Fake.Core.CommandLineParsing 5.20.3 24 | nuget Fake.Core.Environment 5.20.3 25 | nuget Fake.Core.FakeVar 5.20.3 26 | nuget Fake.Core.SemVer 5.20.3 27 | nuget Fake.Core.String 5.20.3 28 | nuget Fake.Core.Context 5.20.3 29 | nuget Fake.Core.Trace 5.20.3 30 | nuget Fake.Core.Tasks 5.20.3 31 | nuget Fake.Core.Target 5.20.3 32 | nuget Fake.Core.ReleaseNotes 5.20.3 33 | nuget Fake.DotNet.AssemblyInfoFile 5.20.3 34 | nuget Fake.DotNet.Cli 5.20.3 35 | nuget Fake.DotNet.Testing.NUnit 5.20.3 36 | nuget Fake.DotNet.NuGet 5.20.3 37 | nuget Fake.DotNet.MsBuild 5.20.3 38 | nuget Fake.Tools.Git 5.20.3 39 | nuget Fake.DotNet.Paket 5.20.3 40 | nuget Microsoft.Build 16.9 41 | nuget Microsoft.Build.Framework 16.9 42 | nuget Microsoft.Build.Tasks.Core 16.9 43 | nuget Microsoft.Build.Utilities.Core 16.9 44 | 45 | group Test 46 | frameworks: net6.0 47 | source https://api.nuget.org/v3/index.json 48 | 49 | nuget FSharp.Core 6.0.0.0 50 | nuget Microsoft.NET.Test.Sdk 51 | nuget NUnit 3.13.1 52 | nuget NUnit3TestAdapter 53 | nuget FsUnit 4.0.4 54 | nuget FsCheck 2.15.1 55 | nuget GitHubActionsTestLogger -------------------------------------------------------------------------------- /src/DesignTime/Common.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders.Common 2 | 3 | open System.Reflection 4 | open System.Text.RegularExpressions 5 | open System.IO 6 | open FluentFTP 7 | 8 | open FSharp.Data 9 | 10 | 11 | // -------------------------------------------------------------------------------------- 12 | // Project Helpers. 13 | // -------------------------------------------------------------------------------------- 14 | 15 | module Helpers = 16 | 17 | /// Parses an optional string. Returns the None option type if the provided string 18 | /// is null or empty. Returns the Some option type containing the provided string 19 | /// otherwise. 20 | let parseOptionString (str: string) = 21 | match System.String.IsNullOrEmpty(str) with 22 | | true -> None 23 | | _ -> Some str 24 | 25 | /// Parses an optional list. Returns the None option type if the provided list 26 | /// is empty. Returns the Some option type containing the provided list otherwise. 27 | let parseOptionList (lst: 'a list) = 28 | match lst.Length with 29 | | 0 -> None 30 | | _ -> Some lst 31 | 32 | /// Parses an optional date. Returns the None option type if the provided date 33 | /// is the default DateTime object (i.e., 1/01/0001 12:00:00 AM). Returns the Some 34 | /// option type containing the provided date otherwise. 35 | let parseOptionDate (date: System.DateTime) = 36 | match date = new System.DateTime() with 37 | | true -> None 38 | | _ -> Some date 39 | 40 | 41 | // -------------------------------------------------------------------------------------- 42 | // Generation Context State Types. 43 | // -------------------------------------------------------------------------------------- 44 | 45 | module Context = 46 | 47 | // ---------------------------------------------------------------------------------- 48 | // Base Name Type. 49 | // ---------------------------------------------------------------------------------- 50 | 51 | /// The underlying Name type. Used to determine whether a string follows a regex 52 | /// pattern supported by a Type Provider. 53 | type Name = 54 | | PlainName of string 55 | | RegexName of string 56 | 57 | /// Creates a Name type. If a string is empty, or its last character is '*', the 58 | /// string is a RegexName. Otherwise, the string is a PlainName. 59 | static member Create(name: string) = 60 | match name with 61 | | _ when name.Length = 0 -> RegexName("*") 62 | | _ when name.[name.Length - 1] = '*' -> RegexName name 63 | | _ -> PlainName name 64 | 65 | /// Converts a Name type to a string. 66 | override this.ToString() = 67 | match this with 68 | | PlainName name -> name 69 | | RegexName name -> name 70 | 71 | 72 | // ---------------------------------------------------------------------------------- 73 | // Database Name Type. 74 | // ---------------------------------------------------------------------------------- 75 | 76 | /// Typed representation of an NCBI Database. NCBI contains two main genome databases 77 | /// GenBank and RefSeq. 78 | type DatabaseName = 79 | | GenBank 80 | | RefSeq 81 | 82 | /// Determines the NCBI FTP server path to the appropriate database. 83 | member this.GetPath() = 84 | match this with 85 | | GenBank -> "/genomes/all/GCA" 86 | | RefSeq -> "/genomes/all/GCF" 87 | 88 | // Returns the name of the database as a string. 89 | override this.ToString() = 90 | match this with 91 | | GenBank -> "GenBank" 92 | | RefSeq -> "RefSeq" 93 | 94 | 95 | // ---------------------------------------------------------------------------------- 96 | // Species Types. 97 | // ---------------------------------------------------------------------------------- 98 | 99 | /// Typed representation of the Species name. 100 | type SpeciesName = 101 | | SpeciesPlainName of string 102 | | SpeciesRegexName of string 103 | 104 | /// Creates a Species Name type. Returns SpeciesRegexName if the species name 105 | /// follows a regex format. Otherwise, returns SpeciesPlainName. 106 | static member Create(species: string) = 107 | match Name.Create species with 108 | | PlainName name -> SpeciesPlainName name 109 | | RegexName name -> SpeciesRegexName name 110 | 111 | /// Converts a Species Name type to a string. For regex names, the final '*' 112 | /// character is replaced by '.*' to follow correct regex formatting. 113 | override this.ToString() = 114 | match this with 115 | | SpeciesPlainName name -> name 116 | | SpeciesRegexName name -> name.Substring(0, name.Length - 1) + ".*" 117 | 118 | 119 | // ---------------------------------------------------------------------------------- 120 | // Accession Types. 121 | // ---------------------------------------------------------------------------------- 122 | 123 | /// Typed representation of the Accession name. 124 | and AccessionName = 125 | | AccessionPlainName of string 126 | | AccessionRegexName of string 127 | 128 | /// Creates an Accession Name type. Returns AccessionRegexName if the species 129 | /// name follows a regex format. Otherwise, returns AccessionPlainName. 130 | static member Create(assembly: string) = 131 | match Name.Create assembly with 132 | | PlainName name -> AccessionPlainName name 133 | | RegexName name -> AccessionRegexName name 134 | 135 | /// Converts an Accession Name type to a string. For regex names, the final '*' 136 | /// character is replaced by '.*' to follow correct regex formatting. 137 | override this.ToString() = 138 | match this with 139 | | AccessionPlainName name -> name 140 | | AccessionRegexName name -> name.Substring(0, name.Length - 1) + ".*" 141 | 142 | 143 | // -------------------------------------------------------------------------------------- 144 | // Generation Context. 145 | // -------------------------------------------------------------------------------------- 146 | 147 | /// The context for type generation. 148 | type Context = 149 | { DatabaseName: DatabaseName 150 | SpeciesName: SpeciesName 151 | Accession: AccessionName } 152 | 153 | /// Parses a species and accession string and returns the corresponding Species and 154 | /// Accession types. 155 | static member Parse (species: string) (accession: string) = 156 | let speciesName = species.ToString() |> (fun s -> s.Trim().ToLower()) 157 | let accessionName = accession.ToString() |> (fun s -> s.Trim().ToLower()) 158 | 159 | SpeciesName.Create speciesName, AccessionName.Create accessionName 160 | 161 | 162 | /// Creates the context type given a Database, Species, and Accession. 163 | static member Create (database: DatabaseName) (species: SpeciesName) (accession: AccessionName) = 164 | { DatabaseName = database 165 | SpeciesName = species 166 | Accession = accession } 167 | 168 | 169 | // -------------------------------------------------------------------------------------- 170 | // FTP Access for Type Providers. 171 | // -------------------------------------------------------------------------------------- 172 | 173 | [] 174 | module FTP = 175 | 176 | /// Creates and uses a connection with the NCBI FTP server. 177 | let internal useNCBIConnection (callback) = 178 | let serverBaseLocation = "ftp://ftp.ncbi.nlm.nih.gov" 179 | use client = new FtpClient(serverBaseLocation) 180 | client.Connect() 181 | callback client 182 | 183 | // Checks if a file exists and if so, whether it is older than the remote 184 | // file. 185 | // - If a file doesn't exist, or is older: return to overwrite existing 186 | // file. 187 | // - Otherwise: return to resume existing file (in case it wasn't 188 | // downloaded fully before). 189 | let isNewerFile (localPath: string) (remotePath: string) (connection: FtpClient) = 190 | if (not (File.Exists(localPath))) then 191 | FtpLocalExists.Overwrite 192 | else 193 | match File.GetLastWriteTime(localPath) > connection.GetModifiedTime(remotePath) with 194 | | true -> FtpLocalExists.Append 195 | | _ -> FtpLocalExists.Overwrite 196 | 197 | /// Downloads a file from the NCBI FTP server to the local file system. 198 | let downloadNCBIFile (localPath: string, remotePath: string) = 199 | let downloadFile (connection: FtpClient) = 200 | 201 | // Check for changed file as well as verification. 202 | connection.DownloadFile( 203 | localPath, 204 | remotePath, 205 | (isNewerFile localPath remotePath connection), 206 | FtpVerify.Retry 207 | ) 208 | 209 | useNCBIConnection downloadFile 210 | 211 | 212 | // -------------------------------------------------------------------------------------- 213 | // Cache Interface. 214 | // -------------------------------------------------------------------------------------- 215 | 216 | open Context 217 | 218 | type private ICache = 219 | abstract LoadFile: string -> Stream 220 | abstract SaveFile: string -> FtpStatus 221 | abstract Purge: unit -> unit 222 | abstract PurgeOld: float -> unit 223 | 224 | 225 | // -------------------------------------------------------------------------------------- 226 | // Cache Helpers. 227 | // -------------------------------------------------------------------------------------- 228 | 229 | module private CacheHelpers = 230 | 231 | let private getCacheFilePath (path: string) = 232 | let cacheLocation = Path.Combine(Path.GetTempPath(), "BioProviders") 233 | let cacheFileName = path.Replace("/", " ").Trim().Replace(" ", "-") 234 | Path.Combine(cacheLocation, cacheFileName) 235 | 236 | let private loadFile (path: string) = 237 | if File.Exists(path) then 238 | Some(File.OpenRead(path)) 239 | else 240 | None 241 | 242 | let internal loadCacheFile (path: string) = 243 | let cachePath = getCacheFilePath (path) 244 | loadFile cachePath 245 | 246 | let internal saveCacheFile (path: string) = 247 | let cachePath = getCacheFilePath (path) 248 | FTP.downloadNCBIFile (cachePath, path) 249 | 250 | let internal clearCache () = 251 | let cacheLocation = Path.Combine(Path.GetTempPath(), "BioProviders") 252 | 253 | if Directory.Exists cacheLocation then 254 | let cacheFiles = Directory.GetFiles cacheLocation 255 | Seq.iter (fun file -> File.Delete(file)) cacheFiles 256 | 257 | let internal clearCacheOld (days: float) = 258 | let cutOffDate = System.DateTime.Now.AddDays(-days) 259 | let cacheLocation = Path.Combine(Path.GetTempPath(), "BioProviders") 260 | 261 | if Directory.Exists cacheLocation then 262 | let cacheFiles = Directory.GetFiles cacheLocation 263 | 264 | Seq.iter 265 | (fun file -> 266 | match File.GetLastAccessTime(file) < cutOffDate with 267 | | true -> File.Delete(file) 268 | | _ -> ()) 269 | cacheFiles 270 | 271 | // Used to download an assembly list from a remote server. 272 | let private saveAssemblyList (path: string) = 273 | let cachePath = getCacheFilePath (path) 274 | 275 | try 276 | // Load the base URL for the location of the assembly lists on a 277 | // remote server by reading a text file included in the library. 278 | // The first line of this file should be the full base URL to use. 279 | // For the default BioProviders source, the URL for the raw .gz 280 | // files in the BioProviders repository is used. 281 | let urlFile = 282 | Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "remote.txt") 283 | 284 | let url = (Seq.head (File.ReadLines(urlFile))) + path 285 | 286 | let data = Http.Request(url).Body 287 | 288 | match data with 289 | | Binary bytes -> 290 | // Create the BioProviders directory if it doesn't exist yet. 291 | Directory.CreateDirectory(Path.Combine(Path.GetTempPath(), "BioProviders")) 292 | |> ignore 293 | 294 | File.WriteAllBytes(cachePath, bytes) 295 | true 296 | | _ -> 297 | failwith ( 298 | sprintf "Could not download remote file %s to %s - did not recieve binary content." path cachePath 299 | ) 300 | 301 | false 302 | with ex -> 303 | failwith ( 304 | sprintf 305 | "Could not download remote file %s to %s, because of the following exception: %s" 306 | path 307 | cachePath 308 | ex.Message 309 | ) 310 | 311 | false 312 | 313 | // Used to load a data file referring to the location of assemblies on 314 | // GenBank's FTP server. If the file does not exist in the cache 315 | // location, attempts to download it from the FTP server (with the 316 | // above function). 317 | let private loadAssemblyList (path: string) = 318 | 319 | let fullPath = getCacheFilePath path 320 | 321 | // Read the existing file if the data file has already been 322 | // downloaded. 323 | if File.Exists(fullPath) then 324 | Some(File.OpenRead(fullPath)) 325 | else 326 | match saveAssemblyList path with 327 | | false -> None 328 | | _ -> Some(File.OpenRead(fullPath)) 329 | 330 | let private getLookupCharacter (name: string) = 331 | match name.Chars(0) with 332 | | c when System.Char.IsLetter(c) -> c 333 | | _ -> '#' 334 | 335 | let private getSpeciesLookupPath (database: DatabaseName) (speciesName: string) = 336 | let character = getLookupCharacter speciesName 337 | $"{(database.ToString()).ToLower()}-species-{character}.txt.gz" 338 | 339 | let private getAssemblyLookupPath (database: DatabaseName) (speciesName: string) = 340 | let character = getLookupCharacter speciesName 341 | $"{(database.ToString()).ToLower()}-assemblies-{character}.txt.gz" 342 | 343 | let private parseAssemblyLine (database: DatabaseName) (assemblyLine: string) = 344 | let assemblyInfo = assemblyLine.Split(',') 345 | let accession = assemblyInfo.[1] 346 | let assemblyPath = $"{database.GetPath()}/{assemblyInfo.[2]}" 347 | 348 | let assemblyName = 349 | assemblyPath.Split('/') |> (fun parts -> parts.[parts.Length - 1]) 350 | 351 | (accession, assemblyName, assemblyPath) 352 | 353 | let private getSpeciesID (database: DatabaseName) (speciesName: string) = 354 | let speciesLookupFile = getSpeciesLookupPath database speciesName 355 | 356 | match loadAssemblyList speciesLookupFile with 357 | | None -> invalidOp $"Could not load {database.ToString()} species lookup file." 358 | | Some data -> 359 | data :> Stream 360 | |> (fun stream -> new Compression.GZipStream(stream, Compression.CompressionMode.Decompress)) 361 | |> (fun gzipStream -> 362 | use stream = new StreamReader(gzipStream) 363 | 364 | let rec checkLine () = 365 | if not stream.EndOfStream then 366 | let line = stream.ReadLine() 367 | let info = line.Split(',') 368 | 369 | if info.[1].ToLower() <> speciesName then 370 | checkLine () 371 | else 372 | info.[0] 373 | else 374 | invalidOp 375 | $"The species could not be found. Check the species name is correct and it is a valid {database.ToString()} species." 376 | 377 | checkLine ()) 378 | 379 | let getAssembly (database: DatabaseName) (species: SpeciesName) (accession: AccessionName) = 380 | let speciesID = getSpeciesID database (species.ToString()) 381 | let assemblyLookupFile = getAssemblyLookupPath database (species.ToString()) 382 | 383 | match loadAssemblyList assemblyLookupFile with 384 | | None -> invalidOp $"Could not load {database.ToString()} assembly lookup file." 385 | | Some data -> 386 | data :> Stream 387 | |> (fun stream -> new Compression.GZipStream(stream, Compression.CompressionMode.Decompress)) 388 | |> (fun gzipStream -> 389 | use stream = new StreamReader(gzipStream) 390 | 391 | let rec checkLine () = 392 | if not stream.EndOfStream then 393 | let line = stream.ReadLine() 394 | let info = line.Split(',') 395 | 396 | if info.[0] <> speciesID || info.[1].ToLower() <> (accession.ToString()) then 397 | checkLine () 398 | else 399 | parseAssemblyLine database line 400 | else 401 | invalidOp 402 | $"The assembly could not be found. Check that the accession is correct and it is a valid {database.ToString()} accession." 403 | 404 | checkLine ()) 405 | 406 | let getAssemblyCollection 407 | (database: DatabaseName) 408 | (assemblyLookupPath: string) 409 | (speciesID: string) 410 | (accessionPattern: string) 411 | = 412 | match loadAssemblyList assemblyLookupPath with 413 | | None -> invalidOp $"Could not load {database.ToString()} assembly lookup file." 414 | | Some data -> 415 | data :> Stream 416 | |> (fun stream -> new Compression.GZipStream(stream, Compression.CompressionMode.Decompress)) 417 | |> (fun gzipStream -> 418 | use stream = new StreamReader(gzipStream) 419 | 420 | let rec checkLine (assemblies: (string * string * string) list) = 421 | if not (stream.EndOfStream && assemblies.Length = 0) then 422 | let line = stream.ReadLine() 423 | let info = line.Split(',') 424 | 425 | if 426 | info.[0] <> speciesID 427 | || not (Regex.IsMatch(info.[1].ToLower(), accessionPattern)) 428 | then 429 | if assemblies.Length = 0 then 430 | checkLine assemblies 431 | else 432 | assemblies 433 | else 434 | assemblies @ [ parseAssemblyLine database line ] |> checkLine 435 | else 436 | invalidOp 437 | "No assemblies matching the accession pattern could be found. Check the accession pattern is correct." 438 | 439 | checkLine []) 440 | 441 | let getSpecies (database: DatabaseName) (species: SpeciesName) = 442 | let speciesName = species.ToString() 443 | let speciesID = getSpeciesID database (speciesName) 444 | let assemblyLookupFile = getAssemblyLookupPath database (speciesName) 445 | 446 | (speciesID, speciesName, assemblyLookupFile) 447 | 448 | let getSpeciesCollection (database: DatabaseName) (speciesPattern: string) = 449 | let speciesLookupPath = getSpeciesLookupPath database speciesPattern 450 | let assemblyLookupPath = getAssemblyLookupPath database speciesPattern 451 | 452 | match loadAssemblyList speciesLookupPath with 453 | | None -> invalidOp $"Could not load {database.ToString()} assembly lookup file." 454 | | Some data -> 455 | data :> Stream 456 | |> (fun stream -> new Compression.GZipStream(stream, Compression.CompressionMode.Decompress)) 457 | |> (fun gzipStream -> 458 | use stream = new StreamReader(gzipStream) 459 | 460 | let rec checkLine (species: (string * string * string) list) = 461 | if not (stream.EndOfStream && species.Length = 0) then 462 | let line = stream.ReadLine() 463 | let info = line.Split(',') 464 | 465 | if not (Regex.IsMatch(info.[1].ToLower(), speciesPattern)) then 466 | if species.Length = 0 then checkLine species else species 467 | else 468 | let speciesID = info.[0] 469 | let speciesName = info.[1] 470 | species @ [ (speciesID, speciesName, assemblyLookupPath) ] |> checkLine 471 | else 472 | invalidOp 473 | "No species matching the pattern could be found. Check the species pattern is correct." 474 | 475 | checkLine []) 476 | 477 | 478 | // -------------------------------------------------------------------------------------- 479 | // Cache Implementation. 480 | // -------------------------------------------------------------------------------------- 481 | 482 | //open CacheHelpers.General 483 | open CacheHelpers 484 | 485 | type private Cache() = 486 | interface ICache with 487 | member __.SaveFile(path: string) = saveCacheFile path 488 | 489 | member __.Purge() = clearCache () 490 | 491 | member __.PurgeOld(days) = clearCacheOld days 492 | 493 | member this.LoadFile(path: string) = 494 | match loadCacheFile (path) with 495 | | Some data -> data :> Stream 496 | | None -> 497 | match (this :> ICache).SaveFile(path) with 498 | | FtpStatus.Success -> (this :> ICache).LoadFile(path) 499 | | _ -> failwithf "Unable to load or save the file %s." path 500 | 501 | 502 | // -------------------------------------------------------------------------------------- 503 | // Cache Access. 504 | // -------------------------------------------------------------------------------------- 505 | 506 | module CacheAccess = 507 | 508 | let loadFile (path: string) = (new Cache() :> ICache).LoadFile path 509 | 510 | let getAssembly (database: DatabaseName) (species: SpeciesName) (accession: AccessionName) = 511 | match database with 512 | | RefSeq _ -> CacheHelpers.getAssembly database species accession //CacheHelpers.RefSeq.getAssembly database species accession //failwith "RefSeq is not currently supported." 513 | | GenBank _ -> CacheHelpers.getAssembly database species accession //CacheHelpers.GenBank.getAssembly database species accession 514 | 515 | let getAssemblies 516 | (database: DatabaseName) 517 | (assemblyLookupPath: string) 518 | (speciesID: string) 519 | (accessionPattern: string) 520 | = 521 | match database with 522 | | RefSeq _ -> CacheHelpers.getAssemblyCollection database assemblyLookupPath speciesID accessionPattern //CacheHelpers.RefSeq.getAssemblies database assemblyLookupPath speciesID accessionPattern //failwith "RefSeq is not currently supported." 523 | | GenBank _ -> CacheHelpers.getAssemblyCollection database assemblyLookupPath speciesID accessionPattern //CacheHelpers.GenBank.getAssemblies database assemblyLookupPath speciesID accessionPattern 524 | 525 | let getSpecies (database: DatabaseName) (species: SpeciesName) = 526 | match database with 527 | | RefSeq _ -> CacheHelpers.getSpecies database species //CacheHelpers.RefSeq.getSpecies species //failwith "RefSeq is not currently supported." 528 | | GenBank _ -> CacheHelpers.getSpecies database species //CacheHelpers.GenBank.getSpecies species 529 | 530 | let getSpeciesCollection (database: DatabaseName) (speciesPattern: string) = 531 | match database, speciesPattern with 532 | | RefSeq _, ".*" -> failwith "A species pattern is required." 533 | | RefSeq _, _ -> CacheHelpers.getSpeciesCollection database speciesPattern //CacheHelpers.RefSeq.getSpeciesCollection speciesPattern //failwith "RefSeq is not currently supported." 534 | | GenBank _, ".*" -> failwith "A species pattern is required." 535 | | GenBank _, _ -> CacheHelpers.getSpeciesCollection database speciesPattern //CacheHelpers.GenBank.getSpeciesCollection speciesPattern 536 | 537 | let deleteOldFiles = (new Cache() :> ICache).PurgeOld(90) 538 | -------------------------------------------------------------------------------- /src/DesignTime/DesignTime.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders.DesignTime 2 | 3 | open FSharp.Core.CompilerServices 4 | open System.Reflection 5 | open ProviderImplementation.ProvidedTypes 6 | 7 | open BioProviders.Common.Context 8 | open BioProviders.DesignTime.TypeGenerator 9 | 10 | // GenBank Type Provider. 11 | [] 12 | type public GenBankProvider(config: TypeProviderConfig) as this = 13 | 14 | // Inherit basic Type Provider functionality and type construction. 15 | inherit 16 | TypeProviderForNamespaces( 17 | config, 18 | assemblyReplacementMap = [ ("BioProviders.DesignTime", "BioProviders") ], 19 | addDefaultProbingLocation = true 20 | ) 21 | 22 | // Define structure of the Type Provider 23 | let namespaceName = "BioProviders" 24 | let thisAssembly = Assembly.GetExecutingAssembly() 25 | 26 | let assemblyProvidedType = 27 | ProvidedTypeDefinition(thisAssembly, namespaceName, "GenBankProvider", Some typeof) 28 | 29 | // Instantiation function for parameterised Assembly Type Provider 30 | let buildAssemblyType (typeName: string) (args: obj[]) = 31 | 32 | // Extract parameters 33 | let species = args.[0] :?> string 34 | let assembly = args.[1] :?> string 35 | 36 | // Define the assembly type 37 | let providedType = 38 | ProvidedTypeDefinition(thisAssembly, namespaceName, typeName, Some typeof) 39 | 40 | // Generate types 41 | (species, assembly) 42 | ||> Context.Parse 43 | ||> Context.Create GenBank 44 | |> createType providedType 45 | 46 | // Define static parameters for the Type Provider 47 | let assemblyParameters = 48 | [ ProvidedStaticParameter("Species", typeof, parameterDefaultValue = "") 49 | ProvidedStaticParameter("Accession", typeof, parameterDefaultValue = "") ] 50 | 51 | do assemblyProvidedType.DefineStaticParameters(assemblyParameters, buildAssemblyType) 52 | 53 | // Add XML documentation to the Type Provider 54 | let assemblyHelpText = 55 | """Typed representation of the NCBI FTP server, for GenBank data. 56 | The name of the species whose genome is being accessed (e.g. "Staphylococcus borealis"). Defaults to "". 57 | The accession of the genome assembly being accessed (e.g. "GCA_003042555.1"). Defaults to "". 58 | Both the and parameters can take in a wildcard character '*' to match more than one species or accession. This can be used after any number of characters, but is only valid at the end of the string.""" 59 | 60 | do assemblyProvidedType.AddXmlDoc(assemblyHelpText) 61 | 62 | // Register the main type with the Type Provider 63 | do this.AddNamespace(namespaceName, [ assemblyProvidedType ]) 64 | 65 | // RefSeq Type Provider. 66 | [] 67 | type public RefSeqProvider(config: TypeProviderConfig) as this = 68 | 69 | // Inherit basic Type Provider functionality and type construction. 70 | inherit 71 | TypeProviderForNamespaces( 72 | config, 73 | assemblyReplacementMap = [ ("BioProviders.DesignTime", "BioProviders") ], 74 | addDefaultProbingLocation = true 75 | ) 76 | 77 | // Define structure of the Type Provider 78 | let namespaceName = "BioProviders" 79 | let thisAssembly = Assembly.GetExecutingAssembly() 80 | 81 | let assemblyProvidedType = 82 | ProvidedTypeDefinition(thisAssembly, namespaceName, "RefSeqProvider", Some typeof) 83 | 84 | // Instantiation function for parameterised Assembly Type Provider 85 | let buildAssemblyType (typeName: string) (args: obj[]) = 86 | 87 | // Extract parameters 88 | let species = args.[0] :?> string 89 | let assembly = args.[1] :?> string 90 | 91 | // Define the assembly type 92 | let providedType = 93 | ProvidedTypeDefinition(thisAssembly, namespaceName, typeName, Some typeof) 94 | 95 | // Generate types 96 | (species, assembly) 97 | ||> Context.Parse 98 | ||> Context.Create RefSeq 99 | |> createType providedType 100 | 101 | // Define static parameters for the Type Provider 102 | let assemblyParameters = 103 | [ ProvidedStaticParameter("Species", typeof, parameterDefaultValue = "") 104 | ProvidedStaticParameter("Accession", typeof, parameterDefaultValue = "") ] 105 | 106 | do assemblyProvidedType.DefineStaticParameters(assemblyParameters, buildAssemblyType) 107 | 108 | // Add XML documentation to the Type Provider 109 | let assemblyHelpText = 110 | """Typed representation of the NCBI FTP server, for RefSeq data. 111 | The name of the species whose genome is being accessed (e.g. "Staphylococcus borealis"). Defaults to "". 112 | The accession of the genome assembly being accessed (e.g. "GCF_001224225.1"). Defaults to "". 113 | Both the and parameters can take in a wildcard character '*' to match more than one species or accession. This can be used after any number of characters, but is only valid at the end of the string.""" 114 | 115 | do assemblyProvidedType.AddXmlDoc(assemblyHelpText) 116 | 117 | // Register the main type with the Type Provider 118 | do this.AddNamespace(namespaceName, [ assemblyProvidedType ]) 119 | -------------------------------------------------------------------------------- /src/DesignTime/DesignTime.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | false 5 | netstandard2.0 6 | false 7 | IS_DESIGNTIME 8 | true 9 | BioProviders.DesignTime 10 | true 11 | 12 | 13 | 14 | 15 | True 16 | paket-files/ProvidedTypes.fsi 17 | 18 | 19 | True 20 | paket-files/ProvidedTypes.fs 21 | 22 | 23 | PreserveNewest 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/DesignTime/TypeGenerator.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders.DesignTime 2 | 3 | open ProviderImplementation.ProvidedTypes 4 | open BioProviders.Common.Context 5 | open BioProviders 6 | 7 | // -------------------------------------------------------------------------------------- 8 | // Type Generation. 9 | // -------------------------------------------------------------------------------------- 10 | 11 | module internal TypeGenerator = 12 | 13 | /// 14 | /// Creates a typed representation of a GenBank Flat File Sequence. 15 | /// 16 | let createGenomicGenBankFlatFileSequence () = 17 | 18 | // Initialise the Genomic GBFF Sequence type. 19 | let genomicGBFFSequence = 20 | ProvidedProperty( 21 | propertyName = "Sequence", 22 | propertyType = typeof>, 23 | getterCode = fun args -> <@@ (%%args.[0]: GenBankFlatFile.GenBankFlatFile).Sequence @@> 24 | ) 25 | 26 | let genomicGBFFSequenceHelpText = 27 | """Typed representation of the Sequence of a Genomic GenBank Flat File.""" 28 | 29 | genomicGBFFSequence.AddXmlDocDelayed(fun () -> genomicGBFFSequenceHelpText) 30 | 31 | genomicGBFFSequence 32 | 33 | 34 | /// 35 | /// Creates a typed representation of a GenBank Flat File Metadata. 36 | /// 37 | let createGenomicGenBankFlatFileMetadata () = 38 | 39 | // Initialise the Genomic GBFF Metadata type. 40 | let genomicGBFFMetadata = 41 | ProvidedProperty( 42 | propertyName = "Metadata", 43 | propertyType = typeof, 44 | getterCode = fun args -> <@@ (%%args.[0]: GenBankFlatFile.GenBankFlatFile).Metadata @@> 45 | ) 46 | 47 | let genomicGBFFMetadataHelpText = 48 | """Typed representation of the Metadata of a Genomic GenBank Flat File.""" 49 | 50 | genomicGBFFMetadata.AddXmlDocDelayed(fun () -> genomicGBFFMetadataHelpText) 51 | 52 | genomicGBFFMetadata 53 | 54 | 55 | /// 56 | /// Creates a typed representation of a Genomic GenBank Flat File. 57 | /// 58 | /// The path to the GenBank Flat File. 59 | let createGenomicGenBankFlatFile (path: string) = 60 | 61 | // Initialise the Genomic GBFF type. 62 | let genomicGBFF = 63 | ProvidedTypeDefinition( 64 | className = "Genome", 65 | baseType = Some(typeof), 66 | hideObjectMethods = true 67 | ) 68 | 69 | let genomicGBFFHelpText = 70 | """Typed representation of an Assembly's Genomic GenBank Flat File.""" 71 | 72 | genomicGBFF.AddXmlDocDelayed(fun () -> genomicGBFFHelpText) 73 | 74 | // Create and add constructor to the Genomic GBFF type. 75 | let genomicGBFFConstructor = 76 | ProvidedConstructor( 77 | parameters = [], 78 | invokeCode = (fun _ -> <@@ GenBankFlatFile.createGenBankFlatFile path @@>) 79 | ) 80 | 81 | let genomicGBFFConstructorHelpText = 82 | """Generic constructor to initialise the Genomic GenBank Flat File.""" 83 | 84 | genomicGBFFConstructor.AddXmlDocDelayed(fun () -> genomicGBFFConstructorHelpText) 85 | genomicGBFF.AddMemberDelayed(fun () -> genomicGBFFConstructor) 86 | 87 | // Create and add Genomic GBFF Sequence. 88 | let genomicGBFFSequence = createGenomicGenBankFlatFileSequence () 89 | genomicGBFF.AddMemberDelayed(fun () -> genomicGBFFSequence) 90 | 91 | // Create and add Genomic GBFF Metadata. 92 | let genomicGBFFMetadata = createGenomicGenBankFlatFileMetadata () 93 | genomicGBFF.AddMemberDelayed(fun () -> genomicGBFFMetadata) 94 | genomicGBFF 95 | 96 | 97 | /// 98 | /// Creates a typed representation of a GenBank Assembly. 99 | /// 100 | /// The assembly type to be constructed. 101 | /// The runtime representation of the assembly to be created. 102 | let createAssembly (providedType: ProvidedTypeDefinition) (assembly: GenBankAssembly) = 103 | 104 | let genomicGBFFPath = assembly.GenBankFlatFilePath 105 | 106 | // Add the genomic GenBank flat file to the assembly type. 107 | let genomicGBFF () = 108 | createGenomicGenBankFlatFile genomicGBFFPath 109 | 110 | providedType.AddMemberDelayed genomicGBFF 111 | 112 | // Add documentation to assembly type and return. 113 | let helpText = """Typed representation of a GenBank assembly.""" 114 | providedType.AddXmlDocDelayed(fun () -> helpText) 115 | providedType 116 | 117 | 118 | /// 119 | /// Creates a typed representation of a GenBank Species. 120 | /// 121 | /// The species type to be constructed. 122 | /// The runtime representation of the species to be created. 123 | /// The accession pattern for the species assemblies. 124 | let createSpecies (providedType: ProvidedTypeDefinition) (species: GenBankSpecies) (accessionPattern: string) = 125 | 126 | // Create the assembly types for the species. 127 | let assemblyTypes () = 128 | species.GetAssemblies accessionPattern 129 | |> List.map (fun assembly -> 130 | let assemblyType = 131 | ProvidedTypeDefinition(assembly.Accession, Some typeof, hideObjectMethods = true) 132 | 133 | createAssembly assemblyType assembly) 134 | 135 | // Add the assembly types to the species type. 136 | providedType.AddMembersDelayed assemblyTypes 137 | 138 | // Add documentation to species type and return. 139 | let helpText = 140 | """Typed representation of a collection of GenBank assemblies for a species.""" 141 | 142 | providedType.AddXmlDocDelayed(fun () -> helpText) 143 | providedType 144 | 145 | 146 | /// 147 | /// Creates a typed representation of a GenBank Taxon. 148 | /// 149 | /// The taxon type to be constructed. 150 | /// The runtime representation of the taxon to be created. 151 | /// The species name pattern for species to be added to the taxon. 152 | /// The accession pattern for the species assemblies. 153 | let createTaxon 154 | (providedType: ProvidedTypeDefinition) 155 | (taxon: GenBankTaxon) 156 | (speciesPattern: string) 157 | (accessionPattern: string) 158 | = 159 | 160 | // Create the species types for the taxon. 161 | let speciesTypes () = 162 | taxon.GetSpecies speciesPattern 163 | |> List.map (fun species -> 164 | let speciesType = 165 | ProvidedTypeDefinition(species.SpeciesName, Some typeof, hideObjectMethods = true) 166 | 167 | createSpecies speciesType species accessionPattern) 168 | 169 | // Add the species types to the taxon type. 170 | providedType.AddMembersDelayed speciesTypes 171 | 172 | // Add documentation to taxon type and return. 173 | let helpText = 174 | """Typed representation of a collection of GenBank species.""" 175 | 176 | providedType.AddXmlDocDelayed(fun () -> helpText) 177 | providedType 178 | 179 | 180 | /// 181 | /// Construct the appropriate provided type based on the context of the Type Provider. 182 | /// 183 | /// The Type Provider type being constructed. 184 | /// The context of the Type Provider. 185 | let createType (providedType: ProvidedTypeDefinition) (context: Context) = 186 | match context.SpeciesName, context.Accession with 187 | | SpeciesRegexName _, _ -> 188 | createTaxon 189 | providedType 190 | (new GenBankTaxon(context)) 191 | (context.SpeciesName.ToString()) 192 | (context.Accession.ToString()) 193 | | SpeciesPlainName _, AccessionRegexName _ -> 194 | createSpecies providedType (new GenBankSpecies(context)) (context.Accession.ToString()) 195 | | SpeciesPlainName _, AccessionPlainName _ -> createAssembly providedType (new GenBankAssembly(context)) 196 | -------------------------------------------------------------------------------- /src/DesignTime/paket.references: -------------------------------------------------------------------------------- 1 | File:ProvidedTypes.fsi 2 | File:ProvidedTypes.fs 3 | 4 | FSharp.Core 5 | FluentFTP 6 | NetBio.Core 7 | BioFSharp 8 | FSharp.Data -------------------------------------------------------------------------------- /src/DesignTime/remote.txt: -------------------------------------------------------------------------------- 1 | https://github.com/fsprojects/BioProviders/raw/main/build/data/ -------------------------------------------------------------------------------- /src/RunTime/GenBankAssembly.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders 2 | 3 | open BioProviders.Common 4 | open BioProviders.Common.Context 5 | 6 | // -------------------------------------------------------------------------------------- 7 | // GenBank Assembly Representation. 8 | // -------------------------------------------------------------------------------------- 9 | 10 | type GenBankAssembly(accession: string, assemblyName: string, assemblyPath: string) = 11 | 12 | member __.Accession = accession 13 | member __.AssemblyName = assemblyName 14 | member __.AssemblyPath = assemblyPath 15 | member __.GenBankFlatFilePath = $"{assemblyPath}/{assemblyName}_genomic.gbff.gz" 16 | 17 | new(context: Context) = 18 | let database = context.DatabaseName 19 | let species = context.SpeciesName 20 | let accession = context.Accession 21 | 22 | let (accessionNumber, assemblyName, assemblyPath) = 23 | CacheAccess.getAssembly database species accession 24 | 25 | GenBankAssembly(accessionNumber, assemblyName, assemblyPath) 26 | -------------------------------------------------------------------------------- /src/RunTime/GenBankFlatFile.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders 2 | 3 | open System.IO 4 | open BioProviders.Common 5 | 6 | // -------------------------------------------------------------------------------------- 7 | // GenBank Flat File Representation. 8 | // -------------------------------------------------------------------------------------- 9 | 10 | module GenBankFlatFile = 11 | 12 | /// 13 | /// GenBank Flat File representation. 14 | /// It consists of two members: 15 | /// 16 | /// 17 | /// Metadata - The metadata for the current sequece, as a GenBankMetadata type. 18 | /// 19 | /// 20 | /// Sequence - The sequence itself as a BioFSharp BioSeq type. 21 | /// 22 | /// 23 | /// 24 | type GenBankFlatFile = 25 | { Metadata: Metadata.Metadata 26 | Sequence: BioFSharp.BioSeq.BioSeq } 27 | 28 | /// 29 | /// Basic constructor for GenBankFlatFile type. Creates a representation based upon an NCBI assembly file. 30 | /// 31 | /// The path to the assembly file. This will usually reside in the cache folder. 32 | let createGenBankFlatFile (path: string) = 33 | 34 | // Delete files that are too old. 35 | // Ideally, we'd have this in a different place, rather than accessed 36 | // any time we want to create a new flat file. 37 | CacheAccess.deleteOldFiles 38 | 39 | // Create DotNet Bio ISequence for the GenBank Flat File. 40 | let sequence = 41 | CacheAccess.loadFile path 42 | |> (fun stream -> new Compression.GZipStream(stream, Compression.CompressionMode.Decompress)) 43 | |> (new Bio.IO.GenBank.GenBankParser()).Parse 44 | |> Seq.cast 45 | |> Seq.head 46 | 47 | let metadata = sequence.Metadata.Item("GenBank") :?> Bio.IO.GenBank.GenBankMetadata 48 | 49 | // Added by Samuel Smith n7581769. 50 | // Change the last access date for the requested file. 51 | if (File.Exists(path)) then 52 | File.SetLastAccessTime(path, System.DateTime.Now) 53 | 54 | // Create GenBank Flat File Type. 55 | { Metadata = Metadata.createMetadata metadata 56 | Sequence = Sequence.createSequence sequence } 57 | -------------------------------------------------------------------------------- /src/RunTime/GenBankMetadata.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders 2 | 3 | open BioProviders.Common 4 | 5 | // -------------------------------------------------------------------------------------- 6 | // GenBank Flat File Metadata Representation. 7 | // -------------------------------------------------------------------------------------- 8 | 9 | module Metadata = 10 | 11 | /// 12 | /// Identifier assigned to each GenBank sequence record. 13 | /// 14 | /// Records can have more than one accession assigned to them. The primary accession number is the newest identifier for the sequence record, and the secondary accession numbers are any of those that were previously assigned to it. A sequence record can have more than one secondary accession. 15 | type Accession = 16 | { Primary: string option 17 | Secondary: string list option } 18 | 19 | let private createAccession (accession: Bio.IO.GenBank.GenBankAccession) = 20 | match accession with 21 | | null -> None 22 | | _ -> 23 | Some 24 | { Primary = accession.Primary |> Helpers.parseOptionString 25 | Secondary = accession.Secondary |> Seq.toList |> Helpers.parseOptionList } 26 | 27 | 28 | /// 29 | /// Specifies the type of DBLink. 30 | /// 31 | type LinkType = 32 | | Project 33 | | TraceAssemblyArchive 34 | | BioProject 35 | 36 | let private createLinkType (crossReferenceType: Bio.IO.GenBank.CrossReferenceType) = 37 | match crossReferenceType with 38 | | Bio.IO.GenBank.CrossReferenceType.None -> None 39 | | Bio.IO.GenBank.CrossReferenceType.Project -> Some LinkType.Project 40 | | Bio.IO.GenBank.CrossReferenceType.TraceAssemblyArchive -> Some LinkType.TraceAssemblyArchive 41 | | Bio.IO.GenBank.CrossReferenceType.BioProject -> Some LinkType.BioProject 42 | | _ -> invalidArg "LinkType" "An invalid LinkType has been encountered." 43 | 44 | 45 | /// 46 | /// Cross-references to resources that support the existence of a sequence 47 | /// record, such as the Project Database and the NCBI Trace Assembly 48 | /// Archive. 49 | /// 50 | type DbLink = 51 | { Numbers: string list option 52 | Type: LinkType option } 53 | 54 | let private createDbLink (crossReferenceLink: Bio.IO.GenBank.CrossReferenceLink) = 55 | { Numbers = crossReferenceLink.Numbers |> Seq.toList |> Helpers.parseOptionList 56 | Type = crossReferenceLink.Type |> createLinkType } 57 | 58 | let private createDbLinks (dbLinks: Bio.IO.GenBank.CrossReferenceLink list) = 59 | dbLinks |> List.map (fun link -> createDbLink link) |> Helpers.parseOptionList 60 | 61 | 62 | /// 63 | /// Specifies which family a sequence belongs to. 64 | /// 65 | type DivisionCode = 66 | /// Primate sequences 67 | | PRI 68 | /// Rodent sequences 69 | | ROD 70 | /// Other mammalian sequences 71 | | MAM 72 | /// Other vertebrate sequences 73 | | VRT 74 | /// Invertebrate sequences 75 | | INV 76 | /// =Plant and Fungal sequences 77 | | PLN 78 | /// Bacterial sequences 79 | | BCT 80 | /// Viral sequences 81 | | VRL 82 | /// Phage sequences 83 | | PHG 84 | /// Synthetic and chimeric sequences 85 | | SYN 86 | /// Unannotated sequences 87 | | UNA 88 | /// Expressed Sequence Tags 89 | | EST 90 | /// Patent sequences 91 | | PAT 92 | /// Sequence Tagged Sites 93 | | STS 94 | /// Genome Survey Sequences 95 | | GSS 96 | /// High Throughput Genomic sequences 97 | | HTG 98 | /// Unfinished High-Throughput cDNA sequencing 99 | | HTC 100 | /// Environmental samples 101 | | ENV 102 | /// Constructed (for contig assembly) 103 | | CON 104 | 105 | let private createDivisionCode (divisionCode: Bio.IO.GenBank.SequenceDivisionCode) = 106 | match divisionCode with 107 | | Bio.IO.GenBank.SequenceDivisionCode.None -> None 108 | | Bio.IO.GenBank.SequenceDivisionCode.PRI -> Some DivisionCode.PRI 109 | | Bio.IO.GenBank.SequenceDivisionCode.ROD -> Some DivisionCode.ROD 110 | | Bio.IO.GenBank.SequenceDivisionCode.MAM -> Some DivisionCode.MAM 111 | | Bio.IO.GenBank.SequenceDivisionCode.VRT -> Some DivisionCode.VRT 112 | | Bio.IO.GenBank.SequenceDivisionCode.INV -> Some DivisionCode.INV 113 | | Bio.IO.GenBank.SequenceDivisionCode.PLN -> Some DivisionCode.PLN 114 | | Bio.IO.GenBank.SequenceDivisionCode.BCT -> Some DivisionCode.BCT 115 | | Bio.IO.GenBank.SequenceDivisionCode.VRL -> Some DivisionCode.VRL 116 | | Bio.IO.GenBank.SequenceDivisionCode.PHG -> Some DivisionCode.PHG 117 | | Bio.IO.GenBank.SequenceDivisionCode.SYN -> Some DivisionCode.SYN 118 | | Bio.IO.GenBank.SequenceDivisionCode.UNA -> Some DivisionCode.UNA 119 | | Bio.IO.GenBank.SequenceDivisionCode.EST -> Some DivisionCode.EST 120 | | Bio.IO.GenBank.SequenceDivisionCode.PAT -> Some DivisionCode.PAT 121 | | Bio.IO.GenBank.SequenceDivisionCode.STS -> Some DivisionCode.STS 122 | | Bio.IO.GenBank.SequenceDivisionCode.GSS -> Some DivisionCode.GSS 123 | | Bio.IO.GenBank.SequenceDivisionCode.HTG -> Some DivisionCode.HTG 124 | | Bio.IO.GenBank.SequenceDivisionCode.HTC -> Some DivisionCode.HTC 125 | | Bio.IO.GenBank.SequenceDivisionCode.ENV -> Some DivisionCode.ENV 126 | | Bio.IO.GenBank.SequenceDivisionCode.CON -> Some DivisionCode.CON 127 | | _ -> invalidArg "DivisionCode" "An invalid DivisionCode has been encountered." 128 | 129 | 130 | /// 131 | /// Specifies the type of biological sequence. 132 | /// 133 | type MoleculeType = 134 | /// No valid type (but set in metadata) 135 | | Invalid 136 | /// Nucleic acid 137 | | NA 138 | /// Deoxyribonucleic acid (DNA) 139 | | DNA 140 | /// Ribonucleic acid (RNA) 141 | | RNA 142 | /// Transfer RNA 143 | | TRNA 144 | /// Ribosomal RNA 145 | | RRNA 146 | /// Messenger RNA 147 | | MRNA 148 | /// Alternate name for SnRNA 149 | | URNA 150 | /// Small nuclear RNA 151 | | SnRNA 152 | /// Small nucleolar RNA 153 | | SnoRNA 154 | /// Protein 155 | | Protein 156 | 157 | let private createMoleculeType (moleculeType: Bio.IO.GenBank.MoleculeType) = 158 | match moleculeType with 159 | | Bio.IO.GenBank.MoleculeType.Invalid -> None 160 | | Bio.IO.GenBank.MoleculeType.NA -> Some MoleculeType.NA 161 | | Bio.IO.GenBank.MoleculeType.DNA -> Some MoleculeType.DNA 162 | | Bio.IO.GenBank.MoleculeType.RNA -> Some MoleculeType.RNA 163 | | Bio.IO.GenBank.MoleculeType.tRNA -> Some MoleculeType.TRNA 164 | | Bio.IO.GenBank.MoleculeType.rRNA -> Some MoleculeType.RRNA 165 | | Bio.IO.GenBank.MoleculeType.mRNA -> Some MoleculeType.MRNA 166 | | Bio.IO.GenBank.MoleculeType.uRNA -> Some MoleculeType.URNA 167 | | Bio.IO.GenBank.MoleculeType.snRNA -> Some MoleculeType.SnRNA 168 | | Bio.IO.GenBank.MoleculeType.snoRNA -> Some MoleculeType.SnoRNA 169 | | Bio.IO.GenBank.MoleculeType.Protein -> Some MoleculeType.Protein 170 | | _ -> invalidArg "MoleculeType" "An invalid MoleculeType has been encountered." 171 | 172 | 173 | /// 174 | /// Specifies whether the sequence occurs as a single stranded, double stranded 175 | /// or mixed stranded. 176 | /// 177 | type StrandType = 178 | | Single 179 | | Double 180 | | Mixed 181 | 182 | let private createStrandType (strandType: Bio.IO.GenBank.SequenceStrandType) = 183 | match strandType with 184 | | Bio.IO.GenBank.SequenceStrandType.None -> None 185 | | Bio.IO.GenBank.SequenceStrandType.Single -> Some StrandType.Single 186 | | Bio.IO.GenBank.SequenceStrandType.Double -> Some StrandType.Double 187 | | Bio.IO.GenBank.SequenceStrandType.Mixed -> Some StrandType.Mixed 188 | | _ -> invalidArg "Strand" "An invalid Strand has been encountered." 189 | 190 | 191 | /// 192 | /// Specifies whether the strand is linear or circular. 193 | /// 194 | type StrandTopology = 195 | | Linear 196 | | Circular 197 | 198 | let private createStrandTopology (strandTopology: Bio.IO.GenBank.SequenceStrandTopology) = 199 | match strandTopology with 200 | | Bio.IO.GenBank.SequenceStrandTopology.None -> None 201 | | Bio.IO.GenBank.SequenceStrandTopology.Linear -> Some StrandTopology.Linear 202 | | Bio.IO.GenBank.SequenceStrandTopology.Circular -> Some StrandTopology.Circular 203 | | _ -> invalidArg "StrandTopology" "An invalid StrandTopology has been encountered." 204 | 205 | 206 | /// 207 | /// Short mnemonic name for the entry, chosen to suggest the sequence's 208 | /// definition. 209 | /// 210 | type Locus = 211 | { Date: System.DateTime option 212 | DivisionCode: DivisionCode option 213 | MoleculeType: MoleculeType option 214 | Name: string option 215 | SequenceLength: int 216 | SequenceType: string option 217 | Strand: StrandType option 218 | StrandTopology: StrandTopology option } 219 | 220 | let private createLocus (locus: Bio.IO.GenBank.GenBankLocusInfo) = 221 | match locus with 222 | | null -> None 223 | | _ -> 224 | Some 225 | { Date = locus.Date |> Helpers.parseOptionDate 226 | DivisionCode = locus.DivisionCode |> createDivisionCode 227 | MoleculeType = locus.MoleculeType |> createMoleculeType 228 | Name = locus.Name |> Helpers.parseOptionString 229 | SequenceLength = locus.SequenceLength 230 | SequenceType = locus.SequenceType |> Helpers.parseOptionString 231 | Strand = locus.Strand |> createStrandType 232 | StrandTopology = locus.StrandTopology |> createStrandTopology } 233 | 234 | 235 | /// 236 | /// Citations for all articles containing data reported in this sequence. 237 | /// Citations in PubMed that do not fall within Medline's scope will have only a 238 | /// PUBMED identifier. Similarly, citations that *are* in Medline's scope but 239 | /// which have not yet been assigned Medline UIs will have only a PUBMED 240 | /// identifier. If a citation is present in both the PubMed and Medline 241 | /// databases, both a MEDLINE and a PUBMED line will be present. 242 | /// 243 | type Reference = 244 | { Authors: string option 245 | Consortiums: string option 246 | Journal: string option 247 | Location: string option 248 | Medline: string option 249 | Number: int 250 | PubMed: string option 251 | Remarks: string option 252 | Title: string option } 253 | 254 | let private createReference (reference: Bio.IO.GenBank.CitationReference) = 255 | { Authors = reference.Authors |> Helpers.parseOptionString 256 | Consortiums = reference.Consortiums |> Helpers.parseOptionString 257 | Journal = reference.Journal |> Helpers.parseOptionString 258 | Location = reference.Location |> Helpers.parseOptionString 259 | Medline = reference.Medline |> Helpers.parseOptionString 260 | Number = reference.Number 261 | PubMed = reference.PubMed |> Helpers.parseOptionString 262 | Remarks = reference.Remarks |> Helpers.parseOptionString 263 | Title = reference.Title |> Helpers.parseOptionString } 264 | 265 | let private createReferences (references: Bio.IO.GenBank.CitationReference list) = 266 | references 267 | |> List.map (fun reference -> createReference reference) 268 | |> Helpers.parseOptionList 269 | 270 | 271 | /// 272 | /// Information on the order in which this entry appears in a series of 273 | /// discontinuous sequences from the same molecule. 274 | /// 275 | type Segment = { Count: int; Current: int } 276 | 277 | let private createSegment (segment: Bio.IO.GenBank.SequenceSegment) = 278 | match segment with 279 | | null -> None 280 | | _ -> 281 | Some 282 | { Count = segment.Count 283 | Current = segment.Current } 284 | 285 | 286 | /// 287 | /// Genus, Species and taxonomic classification levels of the sequence. 288 | /// 289 | type OrganismInfo = 290 | { ClassLevels: string option 291 | Genus: string option 292 | Species: string option } 293 | 294 | let private createOrganismInfo (organismInfo: Bio.IO.GenBank.OrganismInfo) = 295 | match organismInfo with 296 | | null -> None 297 | | _ -> 298 | Some 299 | { ClassLevels = organismInfo.ClassLevels |> Helpers.parseOptionString 300 | Genus = organismInfo.Genus |> Helpers.parseOptionString 301 | Species = organismInfo.Species |> Helpers.parseOptionString } 302 | 303 | 304 | /// 305 | /// Common name of the organism or the name most frequently used in the 306 | /// literature along with the taxonomic classification levels. 307 | /// 308 | type Source = 309 | { CommonName: string option 310 | Organism: OrganismInfo option } 311 | 312 | let private createSource (source: Bio.IO.GenBank.SequenceSource) = 313 | match source with 314 | | null -> None 315 | | _ -> 316 | Some 317 | { CommonName = source.CommonName |> Helpers.parseOptionString 318 | Organism = source.Organism |> createOrganismInfo } 319 | 320 | 321 | /// 322 | /// Compound identifier consisting of the primary accession number and a numeric 323 | /// version number associated with the current version of the sequence data in 324 | /// the record. This is followed by an integer key (a "GI") assigned to the 325 | /// sequence by NCBI. 326 | /// 327 | type Version = 328 | { Accession: string option 329 | CompoundAccession: string option 330 | GiNumber: string option 331 | Version: string option } 332 | 333 | let private createVersion (version: Bio.IO.GenBank.GenBankVersion) = 334 | match version with 335 | | null -> None 336 | | _ -> 337 | Some 338 | { Accession = version.Accession |> Helpers.parseOptionString 339 | CompoundAccession = version.CompoundAccession |> Helpers.parseOptionString 340 | GiNumber = version.GiNumber |> Helpers.parseOptionString 341 | Version = version.Version |> Helpers.parseOptionString } 342 | 343 | 344 | /// 345 | /// Metadata related to the GenBank Flat File. 346 | /// 347 | type Metadata = 348 | { Locus: Locus option 349 | Definition: string option 350 | Accession: Accession option 351 | Version: Version option 352 | DbLinks: DbLink list option 353 | DbSource: string option 354 | Keywords: string option 355 | Primary: string option 356 | Source: Source option 357 | References: Reference list option 358 | Comments: string list option 359 | Contig: string option 360 | Segment: Segment option 361 | Origin: string option } 362 | 363 | let createMetadata (metadata: Bio.IO.GenBank.GenBankMetadata) = 364 | { Locus = metadata.Locus |> createLocus 365 | Definition = metadata.Definition |> Helpers.parseOptionString 366 | Accession = metadata.Accession |> createAccession 367 | Version = metadata.Version |> createVersion 368 | DbLinks = metadata.DbLinks |> Seq.toList |> createDbLinks 369 | DbSource = metadata.DbSource |> Helpers.parseOptionString 370 | Keywords = metadata.Keywords |> Helpers.parseOptionString 371 | Primary = metadata.Primary |> Helpers.parseOptionString 372 | Source = metadata.Source |> createSource 373 | References = metadata.References |> Seq.toList |> createReferences 374 | Comments = metadata.Comments |> Seq.toList |> Helpers.parseOptionList 375 | Contig = metadata.Contig |> Helpers.parseOptionString 376 | Segment = metadata.Segment |> createSegment 377 | Origin = metadata.Origin |> Helpers.parseOptionString } 378 | -------------------------------------------------------------------------------- /src/RunTime/GenBankSequence.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders 2 | 3 | // -------------------------------------------------------------------------------------- 4 | // GenBank Flat File Sequence Representation. 5 | // -------------------------------------------------------------------------------------- 6 | 7 | module Sequence = 8 | 9 | let createSequence (sequence: Bio.ISequence) = 10 | BioFSharp.BioSeq.ofNucleotideString (sequence.ToString()) 11 | -------------------------------------------------------------------------------- /src/RunTime/GenBankSpecies.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders 2 | 3 | open BioProviders.Common 4 | open BioProviders.Common.Context 5 | 6 | // -------------------------------------------------------------------------------------- 7 | // GenBank Species Representation. 8 | // -------------------------------------------------------------------------------------- 9 | 10 | type GenBankSpecies(database: DatabaseName, speciesID: string, speciesName: string, assemblyLookupPath: string) = 11 | 12 | member species.SpeciesName = speciesName 13 | member species.SpeciesID = speciesID 14 | member species.AssemblyLookupPath = assemblyLookupPath 15 | 16 | member species.GetAssemblies(accessionPattern: string) = 17 | CacheAccess.getAssemblies (database) (species.AssemblyLookupPath) (species.SpeciesID) (accessionPattern) 18 | |> List.map (fun assemblyInfo -> 19 | let (accession, assemblyName, assemblyPath) = assemblyInfo 20 | new GenBankAssembly(accession, assemblyName, assemblyPath)) 21 | 22 | new(context: Context) = 23 | let database = context.DatabaseName 24 | let species = context.SpeciesName 25 | 26 | let (speciesID, speciesName, assemblyLookupPath) = 27 | CacheAccess.getSpecies database species 28 | 29 | GenBankSpecies(database, speciesID, speciesName, assemblyLookupPath) 30 | -------------------------------------------------------------------------------- /src/RunTime/GenBankTaxon.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders 2 | 3 | open BioProviders.Common 4 | open BioProviders.Common.Context 5 | 6 | // -------------------------------------------------------------------------------------- 7 | // GenBank Taxon Representation. 8 | // -------------------------------------------------------------------------------------- 9 | 10 | type GenBankTaxon(context: Context) = 11 | 12 | member __.GetSpecies(speciesPattern: string) = 13 | let database = context.DatabaseName 14 | 15 | CacheAccess.getSpeciesCollection database speciesPattern 16 | |> List.map (fun speciesInfo -> 17 | let (speciesID, speciesName, assemblyLookupPath) = speciesInfo 18 | new GenBankSpecies(database, speciesID, speciesName, assemblyLookupPath)) 19 | -------------------------------------------------------------------------------- /src/RunTime/RunTime.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders.RunTime 2 | 3 | open FSharp.Core.CompilerServices 4 | 5 | [] 6 | do () 7 | -------------------------------------------------------------------------------- /src/RunTime/RunTime.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Library 5 | netstandard2.0 6 | true 7 | BioProviders 8 | typeproviders 9 | typeproviders 10 | 11 | 12 | 13 | 14 | PreserveNewest 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | true 28 | all 29 | 30 | 31 | 32 | 33 | <_PackageFiles Include="$(OutputPath)\BioProviders.DesignTime.dll"> 34 | None 35 | lib\netstandard2.0\ 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /src/RunTime/paket.references: -------------------------------------------------------------------------------- 1 | FSharp.Core 2 | FluentFTP 3 | NetBio.Core 4 | BioFSharp 5 | FSharp.Data -------------------------------------------------------------------------------- /tests/ProviderTests/Program.fs: -------------------------------------------------------------------------------- 1 | module Program = 2 | 3 | [] 4 | let main _ = 0 5 | -------------------------------------------------------------------------------- /tests/ProviderTests/ProviderTests.fs: -------------------------------------------------------------------------------- 1 | module ProviderTests 2 | 3 | open NUnit.Framework 4 | 5 | [] 6 | let Setup () = 7 | () 8 | 9 | [] 10 | let Test1 () = 11 | Assert.Pass() 12 | -------------------------------------------------------------------------------- /tests/ProviderTests/ProviderTests.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | false 6 | false 7 | true 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /tests/ProviderTests/ProviderTests.fsx: -------------------------------------------------------------------------------- 1 | #I "../../bin/Release/lib/netstandard2.0" 2 | #r "BioProviders.dll" 3 | #r "nuget: BioFSharp" 4 | 5 | open BioProviders 6 | open BioFSharp 7 | 8 | // Generate the Assembly Type 9 | let [] Species = "Staphylococcus lugdunensis" 10 | let [] Accession = "gca_001546615.1" 11 | 12 | type AssemblyType = GenBankProvider 13 | 14 | 15 | // Use the Assembly Type 16 | let gbff = AssemblyType.Genome() 17 | 18 | gbff.Sequence |> BioSeq.complement 19 | -------------------------------------------------------------------------------- /tests/ProviderTests/paket.references: -------------------------------------------------------------------------------- 1 | group Test 2 | Microsoft.NET.Test.Sdk 3 | NUnit 4 | NUnit3TestAdapter 5 | FsUnit 6 | FsCheck 7 | GitHubActionsTestLogger -------------------------------------------------------------------------------- /tests/Tests/ContextTests.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders.Tests 2 | 3 | open NUnit.Framework 4 | open FsUnit 5 | open BioProviders.Common.Context 6 | open BioProviders.Tests.Data 7 | 8 | 9 | // -------------------------------------------------------------------------------------- 10 | // Context Tests. 11 | // -------------------------------------------------------------------------------------- 12 | 13 | [] 14 | type Context() = 15 | 16 | let mutable contexts = [] 17 | 18 | let mutable database = FsCheck.Gen.sample 0 1 (generateDatabase ()) |> Seq.head 19 | 20 | let mutable regexSpeciesString = FsCheck.Gen.sample 0 1 (generateRegexSpeciesString ()) |> Seq.head 21 | let mutable plainSpeciesString = FsCheck.Gen.sample 0 1 (generatePlainSpeciesString ()) |> Seq.head 22 | let mutable emptySpeciesString = FsCheck.Gen.sample 0 1 (generateEmptyString ()) |> Seq.head 23 | let mutable regexSpecies = FsCheck.Gen.sample 0 1 (generateRegexSpecies ()) |> Seq.head 24 | let mutable plainSpecies = FsCheck.Gen.sample 0 1 (generatePlainSpecies ()) |> Seq.head 25 | let mutable emptySpecies = FsCheck.Gen.sample 0 1 (generateEmptySpecies ()) |> Seq.head 26 | 27 | let mutable regexAssemblyString = FsCheck.Gen.sample 0 1 (generateRegexAssemblyString ()) |> Seq.head 28 | let mutable plainAssemblyString = FsCheck.Gen.sample 0 1 (generatePlainAssemblyString ()) |> Seq.head 29 | let mutable emptyAssemblyString = FsCheck.Gen.sample 0 1 (generateEmptyString ()) |> Seq.head 30 | let mutable regexAssembly = FsCheck.Gen.sample 0 1 (generateRegexAssembly ()) |> Seq.head 31 | let mutable plainAssembly = FsCheck.Gen.sample 0 1 (generatePlainAssembly ()) |> Seq.head 32 | let mutable emptyAssembly = FsCheck.Gen.sample 0 1 (generateEmptyAssembly ()) |> Seq.head 33 | 34 | [] 35 | member __.``Setup`` () = 36 | let size = 0 37 | let count = 1 38 | 39 | contexts <- FsCheck.Gen.sample size 30 (generateContext ()) 40 | 41 | database <- FsCheck.Gen.sample size count (generateDatabase ()) |> Seq.head 42 | 43 | regexSpeciesString <- FsCheck.Gen.sample size count (generateRegexSpeciesString ()) |> Seq.head 44 | plainSpeciesString <- FsCheck.Gen.sample size count (generatePlainSpeciesString ()) |> Seq.head 45 | emptySpeciesString <- FsCheck.Gen.sample size count (generateEmptyString ()) |> Seq.head 46 | regexSpecies <- FsCheck.Gen.sample size count (generateRegexSpecies ()) |> Seq.head 47 | plainSpecies <- FsCheck.Gen.sample size count (generatePlainSpecies ()) |> Seq.head 48 | emptySpecies <- FsCheck.Gen.sample size count (generateEmptySpecies ()) |> Seq.head 49 | 50 | regexAssemblyString <- FsCheck.Gen.sample size count (generateRegexAssemblyString ()) |> Seq.head 51 | plainAssemblyString <- FsCheck.Gen.sample size count (generatePlainAssemblyString ()) |> Seq.head 52 | emptyAssemblyString <- FsCheck.Gen.sample size count (generateEmptyString ()) |> Seq.head 53 | regexAssembly <- FsCheck.Gen.sample size count (generateRegexAssembly ()) |> Seq.head 54 | plainAssembly <- FsCheck.Gen.sample size count (generatePlainAssembly ()) |> Seq.head 55 | emptyAssembly <- FsCheck.Gen.sample size count (generateEmptyAssembly ()) |> Seq.head 56 | 57 | [] 58 | member __.``Parse - Plain Species name`` () = 59 | Context.Parse plainSpeciesString plainAssemblyString 60 | |> (function | (SpeciesPlainName _, _) -> Assert.Pass() 61 | | (SpeciesRegexName _, _) -> Assert.Fail()) 62 | 63 | Context.Parse plainSpeciesString regexAssemblyString 64 | |> (function | (SpeciesPlainName _, _) -> Assert.Pass() 65 | | (SpeciesRegexName _, _) -> Assert.Fail()) 66 | 67 | [] 68 | member __.``Parse - Regex Species name`` () = 69 | Context.Parse regexSpeciesString plainAssemblyString 70 | |> (function | (SpeciesPlainName _, _) -> Assert.Fail() 71 | | (SpeciesRegexName _, _) -> Assert.Pass()) 72 | 73 | Context.Parse regexSpeciesString regexAssemblyString 74 | |> (function | (SpeciesPlainName _, _) -> Assert.Fail() 75 | | (SpeciesRegexName _, _) -> Assert.Pass()) 76 | 77 | [] 78 | member __.``Parse - Plain Assembly name`` () = 79 | Context.Parse plainSpeciesString plainAssemblyString 80 | |> (function | (_, AccessionPlainName _) -> Assert.Pass() 81 | | (_, AccessionRegexName _) -> Assert.Fail()) 82 | 83 | Context.Parse regexSpeciesString plainAssemblyString 84 | |> (function | (_, AccessionPlainName _) -> Assert.Pass() 85 | | (_, AccessionRegexName _) -> Assert.Fail()) 86 | 87 | [] 88 | member __.``Parse - Regex Assembly name`` () = 89 | Context.Parse plainSpeciesString regexAssemblyString 90 | |> (function | (_, AccessionPlainName _) -> Assert.Fail() 91 | | (_, AccessionRegexName _) -> Assert.Pass()) 92 | 93 | Context.Parse regexSpeciesString regexAssemblyString 94 | |> (function | (_, AccessionPlainName _) -> Assert.Fail() 95 | | (_, AccessionRegexName _) -> Assert.Pass()) 96 | 97 | 98 | [] 99 | member __.``Parse - Whitespace padding should be removed`` () = 100 | 101 | let paddedPlainSpecies = " \n\r\t\f" + plainSpeciesString + " \n\r\t\f" 102 | let paddedRegexSpecies = " \n\r\t\f" + regexSpeciesString + " \n\r\t\f" 103 | let paddedPlainAssembly = " \n\r\t\f" + plainAssemblyString + " \n\r\t\f" 104 | let paddedRegexAssembly = " \n\r\t\f" + regexAssemblyString + " \n\r\t\f" 105 | 106 | Context.Parse paddedPlainSpecies paddedPlainAssembly 107 | |> should equal (SpeciesPlainName (plainSpeciesString.ToLower()), 108 | AccessionPlainName (plainAssemblyString.ToLower())) 109 | 110 | Context.Parse paddedRegexSpecies paddedPlainAssembly 111 | |> should equal (SpeciesRegexName (regexSpeciesString.ToLower()), 112 | AccessionPlainName (plainAssemblyString.ToLower())) 113 | 114 | Context.Parse paddedPlainSpecies paddedRegexAssembly 115 | |> should equal (SpeciesPlainName (plainSpeciesString.ToLower()), 116 | AccessionRegexName (regexAssemblyString.ToLower())) 117 | 118 | Context.Parse paddedRegexSpecies paddedRegexAssembly 119 | |> should equal (SpeciesRegexName (regexSpeciesString.ToLower()), 120 | AccessionRegexName (regexAssemblyString.ToLower())) 121 | 122 | [] 123 | member __.``Parse - Empty Assembly should be replaced by regex *`` () = 124 | Context.Parse plainSpeciesString emptyAssemblyString 125 | |> (function | (_, AccessionRegexName "*") -> Assert.Pass() 126 | | _ -> Assert.Fail()) 127 | 128 | Context.Parse regexSpeciesString emptyAssemblyString 129 | |> (function | (_, AccessionRegexName "*") -> Assert.Pass() 130 | | _ -> Assert.Fail()) 131 | 132 | [] 133 | member __.``Parse - Empty Species should be replaced by regex * (Given Assembly also empty)`` () = 134 | Context.Parse emptySpeciesString emptyAssemblyString 135 | |> (function | (SpeciesRegexName "*", AccessionRegexName "*") -> Assert.Pass() 136 | | _ -> Assert.Fail()) 137 | 138 | 139 | // -------------------------------------------------------------------------------------- 140 | // Name Tests. 141 | // -------------------------------------------------------------------------------------- 142 | 143 | [] 144 | type Name() = 145 | 146 | let mutable data = [] 147 | 148 | [] 149 | member __.``Setup`` () = 150 | let size = 0 151 | let count = 30 152 | data <- FsCheck.Gen.sample size count ( generatePlainName () ) 153 | 154 | [] 155 | member __.``Create - Strings ending in * should return Regex Name`` () = 156 | data 157 | |> List.map (fun n -> n + "*") 158 | |> List.map (fun n -> Name.Create n) 159 | |> Seq.iter (fun n -> 160 | match n with 161 | | PlainName _ -> Assert.Fail() 162 | | RegexName _ -> Assert.Pass()) 163 | 164 | [] 165 | member __.``Create - Strings not ending in * should return Plain Name`` () = 166 | data 167 | |> List.map (fun n -> Name.Create n) 168 | |> Seq.iter (fun n -> 169 | match n with 170 | | PlainName _ -> Assert.Pass() 171 | | RegexName _ -> Assert.Fail()) 172 | 173 | [] 174 | member __.``Create - Strings containing * anywhere other than end should return Plain Name`` () = 175 | data 176 | |> List.map (fun n -> n.Insert(System.Random().Next(n.Length), "*")) 177 | |> List.map (fun n -> Name.Create n) 178 | |> Seq.iter (fun n -> 179 | match n with 180 | | PlainName _ -> Assert.Pass() 181 | | RegexName _ -> Assert.Fail()) 182 | 183 | [] 184 | member __.``Create - Empty string should return Regex Name of *`` () = 185 | [""; "\t"; " "; "\n"; " \t \n "] 186 | |> List.map (fun n -> Name.Create n) 187 | |> Seq.iter (fun n -> 188 | match n with 189 | | PlainName _ -> Assert.Fail() 190 | | RegexName _ -> Assert.Pass()) 191 | 192 | [] 193 | member __.``ToString - If non-empty string used to create Name, should return original string`` () = 194 | data 195 | |> List.map (fun n -> Name.Create n) 196 | |> List.map (fun n -> n.ToString()) 197 | |> should equal data 198 | 199 | 200 | // -------------------------------------------------------------------------------------- 201 | // Database Name Tests. 202 | // -------------------------------------------------------------------------------------- 203 | 204 | [] 205 | type DatabaseName() = 206 | 207 | [] 208 | member _.``ToString - RefSeq should return genomes/refseq`` () = 209 | let refseq = RefSeq 210 | refseq.GetPath() |> should equal "/genomes/all/GCF" 211 | 212 | [] 213 | member _.``ToString - GenBank should return genomes/genbank`` () = 214 | let genbank = GenBank 215 | genbank.GetPath() |> should equal "/genomes/all/GCA" 216 | 217 | 218 | // -------------------------------------------------------------------------------------- 219 | // Species Name Tests. 220 | // -------------------------------------------------------------------------------------- 221 | 222 | [] 223 | type SpeciesName() = 224 | 225 | let mutable data = [] 226 | 227 | [] 228 | member __.``Setup`` () = 229 | let size = 0 230 | let count = 30 231 | data <- FsCheck.Gen.sample size count ( generatePlainSpeciesString () ) 232 | 233 | [] 234 | member __.``Create - Strings ending in * should return Regex Species Name`` () = 235 | data 236 | |> List.map (fun s -> s + "*") 237 | |> List.map (fun s -> SpeciesName.Create s) 238 | |> Seq.iter (fun s -> 239 | match s with 240 | | SpeciesPlainName _ -> Assert.Fail() 241 | | SpeciesRegexName _ -> Assert.Pass()) 242 | 243 | [] 244 | member __.``Create - Strings not ending in * should return Plain Species Name`` () = 245 | data 246 | |> List.map (fun s -> SpeciesName.Create s) 247 | |> Seq.iter (fun s -> 248 | match s with 249 | | SpeciesPlainName _ -> Assert.Pass() 250 | | SpeciesRegexName _ -> Assert.Fail()) 251 | 252 | [] 253 | member __.``Create - Strings containing * anywhere other than end should return Plain Species Name`` () = 254 | data 255 | |> List.map (fun s -> s.Insert(System.Random().Next(s.Length), "*")) 256 | |> List.map (fun s -> SpeciesName.Create s) 257 | |> Seq.iter (fun s -> 258 | match s with 259 | | SpeciesPlainName _ -> Assert.Pass() 260 | | SpeciesRegexName _ -> Assert.Fail()) 261 | 262 | [] 263 | member __.``Create - Empty string should return Regex Species Name of *`` () = 264 | [""; "\t"; " "; "\n"; " \t \n "] 265 | |> List.map (fun s -> SpeciesName.Create s) 266 | |> Seq.iter (fun s -> 267 | match s with 268 | | SpeciesPlainName _ -> Assert.Fail() 269 | | SpeciesRegexName _ -> Assert.Pass()) 270 | 271 | [] 272 | member __.``ToString - Should always return string used to create the Species Name`` () = 273 | data 274 | |> List.map (fun s -> SpeciesName.Create s) 275 | |> List.map (fun s -> s.ToString()) 276 | |> should equal data 277 | 278 | 279 | // -------------------------------------------------------------------------------------- 280 | // Assembly Name Tests. 281 | // -------------------------------------------------------------------------------------- 282 | 283 | [] 284 | type AssemblyName() = 285 | 286 | let mutable data = [] 287 | 288 | [] 289 | member __.``Setup`` () = 290 | let size = 0 291 | let count = 30 292 | data <- FsCheck.Gen.sample size count ( generatePlainAssemblyString () ) 293 | 294 | [] 295 | member __.``Create - Strings ending in * should return Regex Assembly Name`` () = 296 | data 297 | |> List.map (fun a -> a + "*") 298 | |> List.map (fun a -> AccessionName.Create a) 299 | |> Seq.iter (fun a -> 300 | match a with 301 | | AccessionPlainName _ -> Assert.Fail() 302 | | AccessionRegexName _ -> Assert.Pass()) 303 | 304 | [] 305 | member __.``Create - Strings not ending in * should return Plain Assembly Name`` () = 306 | data 307 | |> List.map (fun a -> AccessionName.Create a) 308 | |> Seq.iter (fun a -> 309 | match a with 310 | | AccessionPlainName _ -> Assert.Pass() 311 | | AccessionRegexName _ -> Assert.Fail()) 312 | 313 | [] 314 | member __.``Create - Strings containing * anywhere other than end should return Plain Assembly Name`` () = 315 | data 316 | |> List.map (fun a -> a.Insert(System.Random().Next(a.Length), "*")) 317 | |> List.map (fun a -> AccessionName.Create a) 318 | |> Seq.iter (fun a -> 319 | match a with 320 | | AccessionPlainName _ -> Assert.Pass() 321 | | AccessionRegexName _ -> Assert.Fail()) 322 | 323 | [] 324 | member __.``Create - Empty string should return Regex Assembly Name of *`` () = 325 | [""; "\t"; " "; "\n"; " \t \n "] 326 | |> List.map (fun a -> AccessionName.Create a) 327 | |> Seq.iter (fun a -> 328 | match a with 329 | | AccessionPlainName _ -> Assert.Fail() 330 | | AccessionRegexName _ -> Assert.Pass()) 331 | 332 | [] 333 | member __.``ToString - Should always return string used to create the Assembly Name`` () = 334 | data 335 | |> List.map (fun a -> AccessionName.Create a) 336 | |> List.map (fun a -> a.ToString()) 337 | |> should equal data 338 | -------------------------------------------------------------------------------- /tests/Tests/Data.fs: -------------------------------------------------------------------------------- 1 | namespace BioProviders.Tests 2 | 3 | open BioProviders.Common.Context 4 | 5 | module Data = 6 | 7 | let possibleArchaeSpecies = [ 8 | "Aigarchaeota_archaeon_SCGC_AAA471-E14" 9 | "Candidatus_Aenigmarchaeota_archaeon_JGI_0000106-F11" 10 | "Desulfurococcaceae_archaeon_AG1" 11 | "Euryarchaeota_archaeum_SCGC_AAA287-N16" 12 | "Ferroplasma_acidarmanus" 13 | "Geoglobus_ahangari" 14 | "Haloferax_sp._ATCC_BAA-646" 15 | "Infirmifilum_lucidum" 16 | "Methanospirillum_sp._J.3.6.1-F.2.7.3" 17 | "Natronolimnohabitans_innermongolicus" 18 | "Pyrobaculum_sp." 19 | "Salinarchaeum_sp._Harcht-Bsk1" 20 | "Thermoplasmatales_archaeon_A-plasma" 21 | "Vulcanisaeta_moutnovskia" 22 | "crenarchaeote_SCGC_AAA261-L14" 23 | "euryarchaeote_SCGC_AAA261-G15" 24 | "halophilic_archaeon_SHRA6" 25 | "methanogenic_archaeon_ISO4-H5" 26 | "uncultured_Acidilobus_sp._OSP8" 27 | ] 28 | 29 | let possibleBacteriaSpecies = [ 30 | "Anaeromassilibacillus_sp._D41t1_190614_C2" 31 | "Bacillus_sp._B1-WWTP-T-0.5-Post-4" 32 | "Candidatus_Dependentiae_bacterium_ex_Spumella_elongata_CCAP_955_1_" 33 | "Diaphorobacter_polyhydroxybutyrativorans" 34 | "Ectothiorhodospiraceae_bacterium_2226" 35 | "Fannyhessea_vaginae" 36 | "Gemmatimonadetes_bacterium_JGI_0000112-M07" 37 | "Halodesulfovibrio_spirochaetisodalis" 38 | "Imperialibacter_sp._89" 39 | "Janthinobacterium_agaricidamnosum" 40 | "Kluyvera_genomosp._1" 41 | "Lachnoclostridium_sp._Marseille-P6806" 42 | "Marinobacter_sp." 43 | "Nevskia_soli" 44 | "Oceanicola_sp._MCTG156_1a_" 45 | "PVC_group_bacterium" 46 | "Raoultella_sp._18105" 47 | "Staphylococcus_borealis" 48 | "Thauera_selenatis" 49 | "Urbifossiella_limnaea" 50 | "Variovorax_sp._PAMC26660" 51 | "WS1_bacterium_JGI_0000059-K21" 52 | "Xanthobacteraceae_bacterium_501b" 53 | "Yonghaparkia_sp._Soil809" 54 | "Zunongwangia_sp._SCSIO_43204" 55 | "_Sphingomonas_ginsengisoli_Hoang_et_al._2012" 56 | "aff._Roholtiella_sp._LEGE_12411" 57 | "bacteria_symbiont_BFo1_of_Frankliniella_occidentalis" 58 | "cf._Phormidesmis_sp._LEGE_11477" 59 | "delta_proteobacterium_JGI_0000059-J07" 60 | "endosymbiont_of_unidentified_scaly_snail_isolate_Monju" 61 | "filamentous_cyanobacterium_Phorm_46" 62 | "gamma_proteobacterium_SCGC_AAA076-F14" 63 | "methane-oxidizing_endosymbiont_of_Gigantopelta_aegis" 64 | "sulfur-oxidizing_endosymbiont_of_Gigantopelta_aegis" 65 | "thiotrophic_endosymbiont_of_Bathymodiolus_puteoserpentis_Logatchev_" 66 | "unidentified_eubacterium_SCB49" 67 | "zeta_proteobacterium_SCGC_AB-604-B04" 68 | ] 69 | 70 | let possibleFungiSpecies = [ 71 | "Acaromyces_ingoldii" 72 | "Blastomyces_dermatitidis" 73 | "Cutaneotrichosporon_oleaginosum" 74 | "Daldinia_childiae" 75 | "Eutypa_lata" 76 | "Fusarium_fujikuroi" 77 | "Guyanagaster_necrorhizus" 78 | "Histoplasma_mississippiense_nom._inval._" 79 | "Jaminaea_rosea" 80 | "Kockovaella_imperatae" 81 | "Lachancea_lanzarotensis" 82 | "Mycena_indigotica" 83 | "Neurospora_tetrasperma" 84 | "Ogataea_parapolymorpha" 85 | "Penicillium_arizonense" 86 | "Rhizophagus_irregularis" 87 | "Sparassis_crispa" 88 | "Tetrapisispora_blattae" 89 | "Ustilaginoidea_virens" 90 | "Verticillium_alfalfae" 91 | "Westerdykella_ornata" 92 | "Xylona_heveae" 93 | "Yamadazyma_tenuis" 94 | "Zygosaccharomyces_rouxii" 95 | "_Candida_haemuloni" 96 | ] 97 | 98 | let possibleInvertebrateSpecies = [ 99 | "Anoplophora_glabripennis" 100 | "Bombyx_mandarina" 101 | "Cryptotermes_secundus" 102 | "Drosophila_melanogaster" 103 | "Exaiptasia_diaphana" 104 | "Frankliniella_occidentalis" 105 | "Galleria_mellonella" 106 | "Helicoverpa_armigera" 107 | "Ixodes_scapularis" 108 | "Loa_loa" 109 | "Manduca_sexta" 110 | "Neodiprion_lecontei" 111 | "Onthophagus_taurus" 112 | "Papilio_xuthus" 113 | "Rhopalosiphum_maidis" 114 | "Stylophora_pistillata" 115 | "Trachymyrmex_septentrionalis" 116 | "Varroa_jacobsoni" 117 | "Wasmannia_auropunctata" 118 | "Zootermopsis_nevadensis" 119 | ] 120 | 121 | let possiblePlantSpecies = [ 122 | "Arachis_hypogaea" 123 | "Brassica_napus" 124 | "Cannabis_sativa" 125 | "Durio_zibethinus" 126 | "Erythranthe_guttata" 127 | "Glycine_soja" 128 | "Hibiscus_syriacus" 129 | "Ipomoea_triloba" 130 | "Juglans_microcarpa_x_Juglans_regia" 131 | "Macadamia_integrifolia" 132 | "Nicotiana_tabacum" 133 | "Ostreococcus_sp._lucimarinus_" 134 | "Prunus_mume" 135 | "Quercus_lobata" 136 | "Rhodamnia_argentea" 137 | "Salvia_splendens" 138 | "Tripterygium_wilfordii" 139 | "Volvox_carteri" 140 | "Zingiber_officinale" 141 | ] 142 | 143 | let possibleProtozoaSpecies = [ 144 | "Acanthamoeba_castellanii" 145 | "Blastocystis_sp._subtype_4" 146 | "Cryptosporidium_ubiquitum" 147 | "Dictyostelium_discoideum" 148 | "Eimeria_maxima" 149 | "Gregarina_niphandrodes" 150 | "Hemiselmis_andersenii" 151 | "Ichthyophthirius_multifiliis" 152 | "Leishmania_panamensis" 153 | "Nannochloropsis_gaditana" 154 | "Plasmodium_sp._gorilla_clade_G2" 155 | "Saprolegnia_parasitica" 156 | "Trypanosoma_rangeli" 157 | ] 158 | 159 | let possibleVertebrateMammalianSpecies = [ 160 | "Arvicanthis_niloticus" 161 | "Bos_indicus_x_Bos_taurus" 162 | "Camelus_ferus" 163 | "Dipodomys_ordii" 164 | "Elephantulus_edwardii" 165 | "Felis_catus" 166 | "Grammomys_surdaster" 167 | "Homo_sapiens" 168 | "Ictidomys_tridecemlineatus" 169 | "Jaculus_jaculus" 170 | "Loxodonta_africana" 171 | "Molossus_molossus" 172 | "Nomascus_leucogenys" 173 | "Ornithorhynchus_anatinus" 174 | "Prionailurus_bengalensis" 175 | "Rattus_rattus" 176 | "Sorex_araneus" 177 | "Tupaia_chinensis" 178 | "Ursus_arctos" 179 | "Vombatus_ursinus" 180 | "Zalophus_californianus" 181 | ] 182 | 183 | let possibleVertebrateOtherSpecies = [ 184 | "Antrostomus_carolinensis" 185 | "Betta_splendens" 186 | "Centrocercus_urophasianus" 187 | "Danio_rerio" 188 | "Electrophorus_electricus" 189 | "Ficedula_albicollis" 190 | "Gopherus_evgoodei" 191 | "Hippocampus_comes" 192 | "Ictalurus_punctatus" 193 | "Kryptolebias_marmoratus" 194 | "Lonchura_striata" 195 | "Micropterus_salmoides" 196 | "Nestor_notabilis" 197 | "Oryzias_latipes" 198 | "Pseudochaenichthys_georgianus" 199 | "Rhinatrema_bivittatum" 200 | "Salvelinus_sp._IW2-2015" 201 | "Tyto_alba" 202 | "Varanus_komodoensis" 203 | "Xiphophorus_hellerii" 204 | "Zootoca_vivipara" 205 | ] 206 | 207 | let possibleViralSpecies = [ 208 | "Actinidia_chlorotic_ringspot-associated_emaravirus" 209 | "Bandicoot_papillomatosis_carcinomatosis_virus_type_1" 210 | "Carnation_ringspot_virus" 211 | "Delisea_pulchra_RNA_virus" 212 | "Eggplant_mottled_dwarf_nucleorhabdovirus" 213 | "False_black_widow_spider_associated_circular_virus_1" 214 | "Gannoruwa_bat_lyssavirus" 215 | "Haloferax_virus_HF1" 216 | "Indian_citrus_ringspot_virus" 217 | "Jasmine_virus_T" 218 | "Kedougou_virus" 219 | "Lactobacillus_virus_A2" 220 | "Madariaga_virus" 221 | "Nanay_virus" 222 | "Only_Syngen_Nebraska_virus_5" 223 | "Palyam_virus" 224 | "Quailpox_virus" 225 | "Raphanus_sativus_cryptic_virus_1" 226 | "Sclerotinia_sclerotiorum_debilitation-associated_RNA_virus" 227 | "Tea_plant_line_pattern_virus" 228 | "Upsilonpapillomavirus_1" 229 | "Varroa_mite_associated_genomovirus_1" 230 | "Watermelon_virus_A" 231 | "Xapuri_mammarenavirus" 232 | "Yerba_mate-associated_circular_DNA_virus_1" 233 | "Zurich_hartmanivirus" 234 | "crAssphage_cr113_1" 235 | "uncultured_Caudovirales_phage" 236 | "unidentified_human_coronavirus" 237 | ] 238 | 239 | let possibleSpecies = List.concat([ 240 | possibleArchaeSpecies 241 | possibleBacteriaSpecies 242 | possibleFungiSpecies 243 | possibleInvertebrateSpecies 244 | possiblePlantSpecies 245 | possibleProtozoaSpecies 246 | possibleVertebrateMammalianSpecies 247 | possibleVertebrateOtherSpecies 248 | possibleViralSpecies 249 | ]) 250 | 251 | let possibleAssemblies = [ 252 | "GCF_014905175.1_ASM1490517v1" 253 | "GCF_000355655.1_DendPond_male_1.0" 254 | "GCF_002803265.2_SCAv2.0" 255 | "GCF_002237135.1_ASM223713v2" 256 | "GCF_002201575.1_ASM220157v1" 257 | "GCF_000001405.27_GRCh38.p1" 258 | "GCF_000001405.8_NCBI33" 259 | "GCF_000956105.1_Pcoq_1.0" 260 | "GCF_011064425.1_Rrattus_CSIRO_v1" 261 | "GCF_015852505.1_mTacAcu1.pri" 262 | "GCF_003431325.1_ASM343132v1" 263 | "GCF_005543295.1_ASM554329v1" 264 | "GCF_000730175.1_ASM73017v1" 265 | "GCF_004698125.1_ASM469812v1" 266 | "GCF_017874455.1_ASM1787445v1" 267 | "GCF_013839515.1_ASM1383951v1" 268 | "GCF_000313525.1_ASM31352v1" 269 | "GCF_000149585.1_ASM14958v1" 270 | "GCA_003706315.1_ASM370631v1" 271 | "GCA_001931935.1_ASM193193v1" 272 | "GCA_019976455.1_ASM1997645v1" 273 | "GCA_002006685.1_Batr_sala_BS_V1" 274 | "GCA_020617725.1_AMFP15.3.pb" 275 | "GCA_020617735.1_ASM2061773v1" 276 | "GCA_020617735.1_ASM2061773v1" 277 | "GCA_002914405.1_ByssAF2.0" 278 | "GCA_016906875.1_ASM1690687v1" 279 | "GCA_000372705.1_YLSCbra_1.0" 280 | "GCA_020087005.1_ASM2008700v1" 281 | "GCA_018360325.1_CSU_LM81_v1" 282 | ] 283 | 284 | let possibleEmptyValues = [ 285 | "" 286 | " " 287 | "\t" 288 | "\n" 289 | "\r" 290 | "\f" 291 | "\t\f\n \t \f\r" 292 | ] 293 | 294 | let possibleNames = List.concat [ possibleSpecies; possibleAssemblies ] 295 | 296 | 297 | // ---------------------------------------------------------------------------------- 298 | // Generation Functions. 299 | // ---------------------------------------------------------------------------------- 300 | 301 | let generateEmptyString () = 302 | FsCheck.Gen.elements possibleEmptyValues 303 | 304 | let generatePlainName () = 305 | FsCheck.Gen.elements possibleNames 306 | 307 | let generatePlainSpeciesString () = 308 | FsCheck.Gen.elements possibleSpecies 309 | 310 | let generatePlainAssemblyString () = 311 | FsCheck.Gen.elements possibleAssemblies 312 | 313 | let generateRegexSpeciesString () = 314 | possibleSpecies 315 | |> List.map (fun species -> species + "*") 316 | |> FsCheck.Gen.elements 317 | 318 | let generateRegexAssemblyString () = 319 | possibleAssemblies 320 | |> List.map (fun assembly -> assembly + "*") 321 | |> FsCheck.Gen.elements 322 | 323 | let generateDatabase () = 324 | [ RefSeq; GenBank ] 325 | |> FsCheck.Gen.elements 326 | 327 | let generatePlainSpecies () = 328 | (generatePlainSpeciesString ()) 329 | |> FsCheck.Gen.map (fun species -> SpeciesName.Create species) 330 | 331 | let generateRegexSpecies () = 332 | (generateRegexSpeciesString ()) 333 | |> FsCheck.Gen.map (fun species -> SpeciesName.Create species) 334 | 335 | let generateEmptySpecies () = 336 | (generateEmptyString ()) 337 | |> FsCheck.Gen.map (fun species -> SpeciesName.Create species) 338 | 339 | let generatePlainAssembly () = 340 | (generatePlainAssemblyString ()) 341 | |> FsCheck.Gen.map (fun assembly -> AccessionName.Create assembly) 342 | 343 | let generateRegexAssembly () = 344 | (generateRegexAssemblyString ()) 345 | |> FsCheck.Gen.map (fun assembly -> AccessionName.Create assembly) 346 | 347 | let generateEmptyAssembly () = 348 | (generateEmptyString ()) 349 | |> FsCheck.Gen.map (fun assembly -> AccessionName.Create assembly) 350 | 351 | let generateContext () = 352 | let () = FsCheck.Gen.map 353 | let (<*>) = FsCheck.Gen.apply 354 | 355 | let numRegex = System.Random().Next(0, 4) 356 | let regexFields = [0; 1; 2] 357 | |> FsCheck.Gen.shuffle 358 | |> FsCheck.Gen.sample 0 1 359 | |> Seq.head 360 | |> Seq.take numRegex 361 | |> Seq.toList 362 | 363 | let database = generateDatabase () 364 | let species = if (List.contains 1 regexFields) then generateRegexSpecies () else generatePlainSpecies () 365 | let assembly = if (List.contains 2 regexFields) then generateRegexAssembly () else generatePlainAssembly () 366 | 367 | Context.Create 368 | database 369 | <*> species 370 | <*> assembly 371 | -------------------------------------------------------------------------------- /tests/Tests/Program.fs: -------------------------------------------------------------------------------- 1 | module Program = 2 | 3 | [] 4 | let main _ = 0 5 | -------------------------------------------------------------------------------- /tests/Tests/Tests.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | false 6 | false 7 | true 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/Tests/paket.references: -------------------------------------------------------------------------------- 1 | group Test 2 | Microsoft.NET.Test.Sdk 3 | NUnit 4 | NUnit3TestAdapter 5 | FsUnit 6 | FsCheck 7 | GitHubActionsTestLogger --------------------------------------------------------------------------------