├── wiki ├── logo.png ├── flyio_exp.png └── flyio-manual.pdf ├── .Rbuildignore ├── .gitignore ├── man ├── flyio_list_dir.Rd ├── flyio_remove_dir.Rd ├── flyio_get_dir.Rd ├── flyio_set_datasource.Rd ├── flyio_set_dir.Rd ├── flyio_get_datasource.Rd ├── flyio_set_bucket.Rd ├── list_bucket.Rd ├── flyio_get_bucket.Rd ├── file_exists.Rd ├── export_file.Rd ├── flyio_auth.Rd ├── export_rda.Rd ├── import_rds.Rd ├── export_st.Rd ├── import_st.Rd ├── export_rds.Rd ├── export_raster.Rd ├── import_rda.Rd ├── import_raster.Rd ├── export_folder.Rd ├── import_stack.Rd ├── import_file.Rd ├── export_table.Rd ├── export_shp.Rd ├── import_table.Rd ├── list_files.Rd └── import_shp.Rd ├── flyio.Rproj ├── DESCRIPTION ├── NAMESPACE ├── inst └── extdata │ └── mtcars.csv ├── R ├── list_bucket.R ├── file_exists.R ├── flyio_datasource.R ├── import_st.R ├── export_st.R ├── import_rds.R ├── export_rda.R ├── import_stack.R ├── export_raster.R ├── export_rds.R ├── import_rda.R ├── import_raster.R ├── export_table.R ├── import_table.R ├── flyio_bucket.R ├── flyio_dir.R ├── export_file.R ├── import_shp.R ├── export_shp.R ├── import_file.R ├── list_files.R ├── export_folder.R └── flyio_auth.R └── README.md /wiki/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atlanhq/flyio/HEAD/wiki/logo.png -------------------------------------------------------------------------------- /wiki/flyio_exp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atlanhq/flyio/HEAD/wiki/flyio_exp.png -------------------------------------------------------------------------------- /wiki/flyio-manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atlanhq/flyio/HEAD/wiki/flyio-manual.pdf -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.github$ 4 | ^\.git$ 5 | ^wiki$ 6 | ^\.httr-oauth$ 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rproj 3 | .Rhistory 4 | .RData 5 | .Ruserdata 6 | .DS_Store 7 | .httr-oauth 8 | tests/tests.R 9 | -------------------------------------------------------------------------------- /man/flyio_list_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_dir.R 3 | \name{flyio_list_dir} 4 | \alias{flyio_list_dir} 5 | \title{List files in flyio tmp folder} 6 | \usage{ 7 | flyio_list_dir() 8 | } 9 | \value{ 10 | the string - file names 11 | } 12 | \description{ 13 | Get the list of files downloaded by flyio in the default tmp folder 14 | } 15 | \examples{ 16 | flyio_list_dir() 17 | } 18 | -------------------------------------------------------------------------------- /man/flyio_remove_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_dir.R 3 | \name{flyio_remove_dir} 4 | \alias{flyio_remove_dir} 5 | \title{Delete files in flyio tmp folder} 6 | \usage{ 7 | flyio_remove_dir() 8 | } 9 | \value{ 10 | files deleted 11 | } 12 | \description{ 13 | Delete the list of files downloaded by flyio in the default tmp folder 14 | } 15 | \examples{ 16 | flyio_remove_dir() 17 | } 18 | -------------------------------------------------------------------------------- /flyio.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /man/flyio_get_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_dir.R 3 | \name{flyio_get_dir} 4 | \alias{flyio_get_dir} 5 | \title{Get global bucket name for flyio} 6 | \usage{ 7 | flyio_get_dir() 8 | } 9 | \value{ 10 | the string - directory name 11 | } 12 | \description{ 13 | Get global directory where flyio functions will download intermidiate files 14 | } 15 | \details{ 16 | if the directory is not set using flyio_set_dir(), it will return the paste0(tempdir(),"/flyio") 17 | } 18 | \examples{ 19 | flyio_get_dir() 20 | } 21 | -------------------------------------------------------------------------------- /man/flyio_set_datasource.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_datasource.R 3 | \name{flyio_set_datasource} 4 | \alias{flyio_set_datasource} 5 | \title{Set global data source name for flyio} 6 | \usage{ 7 | flyio_set_datasource(data_source) 8 | } 9 | \arguments{ 10 | \item{data_source}{the DataSource name to be set} 11 | } 12 | \value{ 13 | stores the data source name in a global environment under flyioDataSource 14 | } 15 | \description{ 16 | Set global data source name to be used for all the function in flyio 17 | } 18 | \examples{ 19 | flyio_set_datasource("local") 20 | } 21 | -------------------------------------------------------------------------------- /man/flyio_set_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_dir.R 3 | \name{flyio_set_dir} 4 | \alias{flyio_set_dir} 5 | \title{Set global directory for flyio to store data} 6 | \usage{ 7 | flyio_set_dir(dir = paste0(tempdir(), "/flyio", Sys.getpid())) 8 | } 9 | \arguments{ 10 | \item{dir}{the directory to store intermediate files} 11 | } 12 | \value{ 13 | stores the directory in a global environment under CLOUD_DIR 14 | } 15 | \description{ 16 | Set global directory where flyio functions will download intermidiate files 17 | } 18 | \examples{ 19 | flyio_set_dir(dir = tempdir()) 20 | } 21 | -------------------------------------------------------------------------------- /man/flyio_get_datasource.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_datasource.R 3 | \name{flyio_get_datasource} 4 | \alias{flyio_get_datasource} 5 | \title{Get global data source name for flyio} 6 | \usage{ 7 | flyio_get_datasource() 8 | } 9 | \value{ 10 | the string - data source name stored 11 | } 12 | \description{ 13 | Get global data source name to be used for all the functions in flyio. Returns the value stored using flyio_set_datasource 14 | } 15 | \examples{ 16 | # first setting the data source 17 | flyio_set_datasource("s3") 18 | # getting the data source 19 | flyio_get_datasource() 20 | } 21 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: flyio 2 | Type: Package 3 | Title: Read or Write any Format from Anywhere 4 | Version: 0.1.4.2 5 | Author: Himanshu Sikaria [aut, cre] 6 | Maintainer: Himanshu Sikaria 7 | Description: Perform input, output of files in R from data sources like Google Cloud Storage ('GCS') , Amazon Web Services ('AWS S3') or local drive. 8 | URL: https://github.com/atlanhq/flyio 9 | BugReports: https://github.com/atlanhq/flyio/issues 10 | License: GPL-3 11 | Encoding: UTF-8 12 | LazyData: true 13 | RoxygenNote: 6.1.1 14 | Depends: R (>= 3.1.2) 15 | Imports: 16 | stringr, 17 | assertthat, 18 | googleCloudStorageR, 19 | utils, 20 | tools 21 | Remotes: cloudyr/aws.s3 22 | -------------------------------------------------------------------------------- /man/flyio_set_bucket.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_bucket.R 3 | \name{flyio_set_bucket} 4 | \alias{flyio_set_bucket} 5 | \title{Set global bucket name for flyio} 6 | \usage{ 7 | flyio_set_bucket(bucket, data_source = flyio_get_datasource()) 8 | } 9 | \arguments{ 10 | \item{bucket}{the bucket name to be set} 11 | 12 | \item{data_source}{the data source used for I/O. Default chooses the data source set using flyio_set_datasource()} 13 | } 14 | \value{ 15 | stores the bucket name in a global environment under flyioBucketGcs or flyioBucketS3 16 | } 17 | \description{ 18 | Set global bucket name to be used for all the functions in flyio 19 | } 20 | \examples{ 21 | flyio_set_bucket(bucket = "your-bucket-name", data_source = "S3") 22 | } 23 | -------------------------------------------------------------------------------- /man/list_bucket.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/list_bucket.R 3 | \name{list_bucket} 4 | \alias{list_bucket} 5 | \title{List buckets for cloud storage} 6 | \usage{ 7 | list_bucket(data_source = flyio_get_datasource(), gcs_project = "") 8 | } 9 | \arguments{ 10 | \item{data_source}{default to local. Possible options : gcs, s3, local. Case insensitive} 11 | 12 | \item{gcs_project}{Project containing buckets to list in Google Cloud Storage} 13 | } 14 | \value{ 15 | vector of bucket names 16 | } 17 | \description{ 18 | List buckets for cloud storage 19 | } 20 | \examples{ 21 | # No buckets if data source is local 22 | list_bucket(data_source = "local") 23 | \dontrun{ 24 | flyio_set_datasource("s3") 25 | flyio_auth() # authentication needed for S3 26 | list_bucket() 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /man/flyio_get_bucket.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_bucket.R 3 | \name{flyio_get_bucket} 4 | \alias{flyio_get_bucket} 5 | \title{Get global bucket name for flyio} 6 | \usage{ 7 | flyio_get_bucket(data_source = flyio_get_datasource()) 8 | } 9 | \arguments{ 10 | \item{data_source}{the data source used for I/O. Default chooses the data source set using flyio_set_datasource()} 11 | } 12 | \value{ 13 | the string - bucket name stored 14 | } 15 | \description{ 16 | Get global bucket name to be used for all the functions in flyio 17 | } 18 | \details{ 19 | if the data source is local, then an empty string is returned 20 | } 21 | \examples{ 22 | # first setting the bucket for a data source 23 | flyio_set_bucket(bucket = "socialcops-test", data_source = "S3") 24 | # retrieving the bucket for S3 25 | flyio_get_bucket(data_source = "S3") 26 | } 27 | -------------------------------------------------------------------------------- /man/file_exists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/file_exists.R 3 | \name{file_exists} 4 | \alias{file_exists} 5 | \title{Check if a file exists} 6 | \usage{ 7 | file_exists(path, data_source = flyio_get_datasource(), 8 | bucket = flyio_get_bucket(data_source)) 9 | } 10 | \arguments{ 11 | \item{path}{the entire path for the file} 12 | 13 | \item{data_source}{the name of the data source, if not set globally. s3, gsc or local} 14 | 15 | \item{bucket}{the name of the bucket, if not set globally} 16 | } 17 | \value{ 18 | logical. if the file exists or not 19 | } 20 | \description{ 21 | Check if a file exists 22 | } 23 | \examples{ 24 | # Check with data source local 25 | file_exists(path = "tests/mtcars.csv", data_source = "local") 26 | \dontrun{ 27 | # Check with data source GCS 28 | flyio_set_datasource("gcs") 29 | flyio_set_bucket("your-bucket-name") 30 | file_exists(path = "tests/mtcars.csv") 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("export_file") 4 | export("export_folder") 5 | export("export_raster") 6 | export("export_rda") 7 | export("export_rds") 8 | export("export_shp") 9 | export("export_st") 10 | export("export_table") 11 | export("file_exists") 12 | export("flyio_auth") 13 | export("flyio_get_bucket") 14 | export("flyio_get_datasource") 15 | export("flyio_get_dir") 16 | export("flyio_list_dir") 17 | export("flyio_remove_dir") 18 | export("flyio_set_bucket") 19 | export("flyio_set_datasource") 20 | export("flyio_set_dir") 21 | export("import_file") 22 | export("import_raster") 23 | export("import_rda") 24 | export("import_rds") 25 | export("import_shp") 26 | export("import_st") 27 | export("import_stack") 28 | export("import_table") 29 | export("list_bucket") 30 | export("list_files") 31 | import("assertthat") 32 | import("aws.s3") 33 | import("googleCloudStorageR") 34 | import("stringr") 35 | import("tools") 36 | import("utils") 37 | -------------------------------------------------------------------------------- /man/export_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_file.R 3 | \name{export_file} 4 | \alias{export_file} 5 | \title{Upload a file from the local system to cloud} 6 | \usage{ 7 | export_file(localfile, bucketpath, data_source = flyio_get_datasource(), 8 | bucket = flyio_get_bucket(data_source), show_progress = FALSE, ...) 9 | } 10 | \arguments{ 11 | \item{localfile}{path of the file to be uploaded} 12 | 13 | \item{bucketpath}{path where the file needs to be uploaded, the file name can or cannot be present} 14 | 15 | \item{data_source}{the name of the data source, if not set globally. gcs or s3} 16 | 17 | \item{bucket}{the name of the bucket, if not set globally} 18 | 19 | \item{show_progress}{logical. Shows progress of the upload operation.} 20 | 21 | \item{...}{other parameters for gcs_upload or aws.s3::put_object} 22 | } 23 | \value{ 24 | the filename and path of the file in the bucket 25 | } 26 | \description{ 27 | Write a local file to the cloud, S3 or GCS 28 | } 29 | \examples{ 30 | \dontrun{ 31 | flyio_set_datasource("gcs") 32 | flyio_set_bucket("your-bucket-name") 33 | export_file("file-local.csv", "file-on-cloud.csv") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/flyio_auth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flyio_auth.R 3 | \name{flyio_auth} 4 | \alias{flyio_auth} 5 | \title{Authenticate flyio} 6 | \usage{ 7 | flyio_auth(auth_list = "", data_source = flyio_get_datasource(), 8 | scope = "https://www.googleapis.com/auth/devstorage.full_control", 9 | awsprofile = "default") 10 | } 11 | \arguments{ 12 | \item{auth_list}{path to the json file or the system environment name in case of gcs. For s3 a vector for access_key, secret_access_key, region (optional; default us-east-1) and session_id (optional); this could also be a single comma-separated string. If left blank, then for s3 it will pick from ~/.aws/credentials file} 13 | 14 | \item{data_source}{default to local. Possible options : gcs, s3, local. Case insensitive} 15 | 16 | \item{scope}{the scope of the auth if gcs. Default: https://www.googleapis.com/auth/devstorage.full_control} 17 | 18 | \item{awsprofile}{if auth_list = "", which profile to use from ~/.aws/credentials} 19 | } 20 | \description{ 21 | Authenticate any of the cloud storage platforms to perform any I/O 22 | } 23 | \examples{ 24 | flyio_set_datasource("local") 25 | flyio_auth() 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/export_rda.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_rda.R 3 | \name{export_rda} 4 | \alias{export_rda} 5 | \title{Write RDA files} 6 | \usage{ 7 | export_rda(..., file, FUN = save, data_source = flyio_get_datasource(), 8 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 9 | delete_file = TRUE, show_progress = FALSE) 10 | } 11 | \arguments{ 12 | \item{...}{R ojects need to be saved} 13 | 14 | \item{file}{path of the file to be written to} 15 | 16 | \item{FUN}{the function using which the file is to write} 17 | 18 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 19 | 20 | \item{bucket}{the name of the bucket, if not set globally} 21 | 22 | \item{dir}{the directory to store intermediate files} 23 | 24 | \item{delete_file}{logical. to delete the file to be uploaded} 25 | 26 | \item{show_progress}{logical. Shows progress of the upload operation.} 27 | } 28 | \value{ 29 | No output 30 | } 31 | \description{ 32 | Write R data RDA file to anywhere from R 33 | } 34 | \examples{ 35 | \dontrun{ 36 | # save RDA on Google Cloud Storage 37 | flyio_set_datasource("gcs") 38 | flyio_set_bucket("your-bucket-name") 39 | export_rda(iris, mtcars, "rda-on-cloud.rda", dir = tempdir()) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/import_rds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_rds.R 3 | \name{import_rds} 4 | \alias{import_rds} 5 | \title{Read RDS file} 6 | \usage{ 7 | import_rds(file, FUN = readRDS, data_source = flyio_get_datasource(), 8 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 9 | delete_file = TRUE, show_progress = FALSE, ...) 10 | } 11 | \arguments{ 12 | \item{file}{path of the file to be read} 13 | 14 | \item{FUN}{the function using which the file is to be read} 15 | 16 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 17 | 18 | \item{bucket}{the name of the bucket, if not set globally} 19 | 20 | \item{dir}{the directory to store intermediate files} 21 | 22 | \item{delete_file}{logical. to delete the file downloaded} 23 | 24 | \item{show_progress}{logical. Shows progress of the download operation} 25 | 26 | \item{...}{other parameters for the FUN function defined above} 27 | } 28 | \value{ 29 | the output of the FUN function 30 | } 31 | \description{ 32 | Read R data - RDS file from anywhere 33 | } 34 | \examples{ 35 | \dontrun{ 36 | # Import RDS from Google Cloud 37 | flyio_set_datasource("gcs") 38 | flyio_set_bucket("your-bucket-name") 39 | import_rds("rds-on-cloud.rds", readRDS, dir = tempdir()) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/export_st.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_st.R 3 | \name{export_st} 4 | \alias{export_st} 5 | \title{Write geojson and geopkgs} 6 | \usage{ 7 | export_st(x, file, FUN = sf::write_sf, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{x}{variable name} 14 | 15 | \item{file}{path of the file to be written to} 16 | 17 | \item{FUN}{the function using which the file is to write} 18 | 19 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 20 | 21 | \item{bucket}{the name of the bucket, if not set globally} 22 | 23 | \item{dir}{the directory to store intermediate files} 24 | 25 | \item{delete_file}{logical. to delete the file to be uploaded} 26 | 27 | \item{show_progress}{logical. Shows the progress of the upload operation} 28 | 29 | \item{...}{other parameters for the FUN function defined above} 30 | } 31 | \value{ 32 | No output 33 | } 34 | \description{ 35 | Write geojson and geopkgs 36 | } 37 | \examples{ 38 | \dontrun{ 39 | # for data on cloud 40 | flyio_set_datasource("gcs") 41 | flyio_set_bucket("your-bucket-name") 42 | export_table(iris, "iris.geojson", dir = tempdir()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/import_st.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_st.R 3 | \name{import_st} 4 | \alias{import_st} 5 | \title{Read geojson, geopkg} 6 | \usage{ 7 | import_st(file, FUN = sf::read_sf, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{file}{path of the file to be read} 14 | 15 | \item{FUN}{the function using which the file is to be read} 16 | 17 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 18 | 19 | \item{bucket}{the name of the bucket, if not set globally} 20 | 21 | \item{dir}{the directory to store intermediate files} 22 | 23 | \item{delete_file}{logical. to delete the file downloaded} 24 | 25 | \item{show_progress}{logical. Shows the progress of the download operation} 26 | 27 | \item{...}{other parameters for the FUN function defined above} 28 | } 29 | \value{ 30 | the output of the FUN function 31 | } 32 | \description{ 33 | Read geospatial data from anywhere using a function defined by you 34 | } 35 | \examples{ 36 | \dontrun{ 37 | # for data on cloud 38 | flyio_set_datasource("gcs") 39 | flyio_set_bucket("your-bucket-name") 40 | data = import_table("excel-file-on-gcs.geojson", dir = tempdir()) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /man/export_rds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_rds.R 3 | \name{export_rds} 4 | \alias{export_rds} 5 | \title{Write RDS files} 6 | \usage{ 7 | export_rds(x, file, FUN = saveRDS, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{x}{variable name} 14 | 15 | \item{file}{path of the file to be written to} 16 | 17 | \item{FUN}{the function using which the file is to write} 18 | 19 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 20 | 21 | \item{bucket}{the name of the bucket, if not set globally} 22 | 23 | \item{dir}{the directory to store intermediate files} 24 | 25 | \item{delete_file}{logical. to delete the file to be uploaded} 26 | 27 | \item{show_progress}{logical. Shows progress of the upload operation.} 28 | 29 | \item{...}{other parameters for the FUN function defined above} 30 | } 31 | \value{ 32 | if FUN returns anything 33 | } 34 | \description{ 35 | Write R data RDS file to anywhere from R 36 | } 37 | \examples{ 38 | \dontrun{ 39 | # save RDS on Google Cloud 40 | flyio_set_datasource("gcs") 41 | flyio_set_bucket("your-bucket-name") 42 | export_rds(iris, "iris-on-cloud.rds", saveRDS, dir = tempdir()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/export_raster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_raster.R 3 | \name{export_raster} 4 | \alias{export_raster} 5 | \title{Write raster} 6 | \usage{ 7 | export_raster(x, file, FUN = raster::writeRaster, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{x}{variable name} 14 | 15 | \item{file}{path of the file to be written to} 16 | 17 | \item{FUN}{the function using which the file is to write} 18 | 19 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 20 | 21 | \item{bucket}{the name of the bucket, if not set globally} 22 | 23 | \item{dir}{the directory to store intermediate files} 24 | 25 | \item{delete_file}{logical. to delete the file to be uploaded} 26 | 27 | \item{show_progress}{logical. Shows progress of the upload operation.} 28 | 29 | \item{...}{other parameters for the FUN function defined above} 30 | } 31 | \value{ 32 | No output 33 | } 34 | \description{ 35 | Write raster 36 | } 37 | \examples{ 38 | \dontrun{ 39 | flyio_set_datasource("gcs") 40 | flyio_set_bucket("your-bucket-name") 41 | r1 <- raster(nrows=108, ncols=21, xmn=0, xmx=10) 42 | export_raster(r1, "raster-cloud.tif", writeRaster, format = "GTiff", dir = tempdir()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/import_rda.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_rda.R 3 | \name{import_rda} 4 | \alias{import_rda} 5 | \title{Read RDA file} 6 | \usage{ 7 | import_rda(file, FUN = load, data_source = flyio_get_datasource(), 8 | bucket = flyio_get_bucket(data_source), envir = globalenv(), 9 | dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, 10 | ...) 11 | } 12 | \arguments{ 13 | \item{file}{path of the file to be read} 14 | 15 | \item{FUN}{the function using which the file is to be read} 16 | 17 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 18 | 19 | \item{bucket}{the name of the bucket, if not set globally} 20 | 21 | \item{envir}{the environment in which to import the objects} 22 | 23 | \item{dir}{the directory to store intermediate files} 24 | 25 | \item{delete_file}{logical. to delete the file downloaded} 26 | 27 | \item{show_progress}{logical. Shows progress of the download operation} 28 | 29 | \item{...}{other parameters for the FUN function defined above} 30 | } 31 | \value{ 32 | the output of the FUN function 33 | } 34 | \description{ 35 | Read RData or rda file from anywhere 36 | } 37 | \examples{ 38 | \dontrun{ 39 | # Load RDA from Google Cloud 40 | flyio_set_datasource("gcs") 41 | flyio_set_bucket("your-bucket-name") 42 | import_rda("rds-on-cloud.rda", dir = tempdir()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/import_raster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_raster.R 3 | \name{import_raster} 4 | \alias{import_raster} 5 | \title{Read raster files} 6 | \usage{ 7 | import_raster(file, FUN = raster::raster, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = FALSE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{file}{path of the file to be read} 14 | 15 | \item{FUN}{the function using which the file is to be read} 16 | 17 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 18 | 19 | \item{bucket}{the name of the bucket, if not set globally} 20 | 21 | \item{dir}{the directory to store intermediate files} 22 | 23 | \item{delete_file}{logical. to delete the file downloaded} 24 | 25 | \item{show_progress}{logical. Shows progress of the download operation} 26 | 27 | \item{...}{other parameters for the FUN function defined above} 28 | } 29 | \value{ 30 | the output of the FUN function 31 | } 32 | \description{ 33 | Read raster data from anywhere using a function defined by you 34 | } 35 | \examples{ 36 | \dontrun{ 37 | # when data source is cloud 38 | flyio_set_datasource("gcs") 39 | flyio_set_bucket("your-bucket-name") 40 | library(raster) 41 | t = import_raster("your-raster.tif", FUN = raster, dir = tempdir()) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /man/export_folder.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_folder.R 3 | \name{export_folder} 4 | \alias{export_folder} 5 | \title{Upload a folder from the local system to cloud} 6 | \usage{ 7 | export_folder(localfolder, bucketpath, pattern = "*", overwrite = TRUE, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), show_progress = FALSE, ...) 10 | } 11 | \arguments{ 12 | \item{localfolder}{path of the folder in which all the files are to be uploaded} 13 | 14 | \item{bucketpath}{path of the folder in which the files are to be uploaded} 15 | 16 | \item{pattern}{pattern of the file names in the folder to be uploaded} 17 | 18 | \item{overwrite}{if files need to be overwritten (if already present)} 19 | 20 | \item{data_source}{the name of the data source, if not set globally. can be gcs or s3} 21 | 22 | \item{bucket}{the name of the bucket, if not set globally} 23 | 24 | \item{show_progress}{logical. Shows progress of the upload operation.} 25 | 26 | \item{...}{other parameters for gcs/s3 upload} 27 | } 28 | \value{ 29 | the filename and path of the file in the bucket 30 | } 31 | \description{ 32 | Write a local folder to the cloud, S3 or GCS 33 | } 34 | \examples{ 35 | \dontrun{ 36 | flyio_set_datasource("gcs") 37 | flyio_set_bucket("your-bucket-name") 38 | export_folder("folder-local/", "folder-on-cloud/") 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /man/import_stack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_stack.R 3 | \name{import_stack} 4 | \alias{import_stack} 5 | \title{Read stack from GCS/S3 or local} 6 | \usage{ 7 | import_stack(pathstack, FUN = raster::stack, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = FALSE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{pathstack}{vector of paths of rasters (layers)} 14 | 15 | \item{FUN}{the function using which the file is to be read} 16 | 17 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 18 | 19 | \item{bucket}{the name of the bucket, if not set globally} 20 | 21 | \item{dir}{the directory to store intermediate files} 22 | 23 | \item{delete_file}{logical. to delete the file downloaded} 24 | 25 | \item{show_progress}{logical. Shows progress of the download operation} 26 | 27 | \item{...}{other parameters for the FUN function defined above} 28 | } 29 | \value{ 30 | the output of the FUN function 31 | } 32 | \description{ 33 | Read Stack/Brick data from anywhere using a function defined by you 34 | } 35 | \examples{ 36 | \dontrun{ 37 | # Import stack from Google Cloud 38 | flyio_set_datasource("gcs") 39 | flyio_set_bucket("your-bucket-name") 40 | t = import_stack("tests/raster-cloud/", dir = tempdir()) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /man/import_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_file.R 3 | \name{import_file} 4 | \alias{import_file} 5 | \title{Download file from cloud to local system} 6 | \usage{ 7 | import_file(bucketpath, localfile, data_source = flyio_get_datasource(), 8 | bucket = flyio_get_bucket(data_source), overwrite = TRUE, 9 | show_progress = FALSE, ...) 10 | } 11 | \arguments{ 12 | \item{bucketpath}{path of file in the bucket} 13 | 14 | \item{localfile}{path where the file needs to be downloaded. The file name and extension also need to be present; if not, the current file name will be considered} 15 | 16 | \item{data_source}{the name of the data source, if not set globally, gcs or s3} 17 | 18 | \item{bucket}{the name of the bucket, if not set globally} 19 | 20 | \item{overwrite}{logical. If the files should be overwritten if already present} 21 | 22 | \item{show_progress}{logical. Shows progress of the download operation} 23 | 24 | \item{...}{other parameters for gcs_get_object or save_object} 25 | } 26 | \value{ 27 | the filename and path of the object saved to local 28 | } 29 | \description{ 30 | Save a single file from the cloud to your local drive 31 | } 32 | \examples{ 33 | \dontrun{ 34 | # import data from GCS to Local 35 | flyio_set_datasource("gcs") 36 | flyio_set_bucket("your-bucket-name") 37 | import_file("mtcars.csv", paste0(tempdir(), "/mtcars.csv"), overwrite = T) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /inst/extdata/mtcars.csv: -------------------------------------------------------------------------------- 1 | "mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" 2 | 21,6,160,110,3.9,2.62,16.46,0,1,4,4 3 | 21,6,160,110,3.9,2.875,17.02,0,1,4,4 4 | 22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 5 | 21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 6 | 18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 7 | 18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 8 | 14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 9 | 24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 10 | 22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 11 | 19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 12 | 17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 13 | 16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 14 | 17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 15 | 15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 16 | 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 17 | 10.4,8,460,215,3,5.424,17.82,0,0,3,4 18 | 14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 19 | 32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 20 | 30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 21 | 33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 22 | 21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 23 | 15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 24 | 15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 25 | 13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 26 | 19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 27 | 27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 28 | 26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 29 | 30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 30 | 15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 31 | 19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 32 | 15,8,301,335,3.54,3.57,14.6,0,1,5,8 33 | 21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 34 | -------------------------------------------------------------------------------- /man/export_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_table.R 3 | \name{export_table} 4 | \alias{export_table} 5 | \title{Write csv, Excel files, txt} 6 | \usage{ 7 | export_table(x, file, FUN = data.table::fwrite, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{x}{variable name} 14 | 15 | \item{file}{path of the file to be written to} 16 | 17 | \item{FUN}{the function using which the file is to write} 18 | 19 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 20 | 21 | \item{bucket}{the name of the bucket, if not set globally} 22 | 23 | \item{dir}{the directory to store intermediate files} 24 | 25 | \item{delete_file}{logical. to delete the file to be uploaded} 26 | 27 | \item{show_progress}{logical. Shows progress of the upload operation.} 28 | 29 | \item{...}{other parameters for the FUN function defined above} 30 | } 31 | \value{ 32 | No output 33 | } 34 | \description{ 35 | Write csv, Excel files, txt 36 | } 37 | \examples{ 38 | # for data on local 39 | export_table(iris, paste0(tempdir(), "/iris.csv"), FUN = write.csv, data_source = "local") 40 | \dontrun{ 41 | # for data on cloud 42 | flyio_set_datasource("gcs") 43 | flyio_set_bucket("your-bucket-name") 44 | export_table(iris, "iris.csv", write.csv, dir = tempdir()) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /man/export_shp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/export_shp.R 3 | \name{export_shp} 4 | \alias{export_shp} 5 | \title{Write shapefiles} 6 | \usage{ 7 | export_shp(obj, pathshp, FUN = rgdal::writeOGR, dsnlayerbind = F, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{obj}{R object to be written} 14 | 15 | \item{pathshp}{the path of the shapefile, which may or may not include the extension} 16 | 17 | \item{FUN}{the function using which the file is to be read} 18 | 19 | \item{dsnlayerbind}{if the FUN needs dsn and layer binded or not} 20 | 21 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 22 | 23 | \item{bucket}{the name of the bucket, if not set globally} 24 | 25 | \item{dir}{the directory to store intermediate files} 26 | 27 | \item{delete_file}{logical. to delete the file to be uploaded} 28 | 29 | \item{show_progress}{logical. Shows progress of the upload operation.} 30 | 31 | \item{...}{other parameters for the FUN function defined above} 32 | } 33 | \value{ 34 | output of the FUN function if any 35 | } 36 | \description{ 37 | Write shapefiles 38 | } 39 | \examples{ 40 | \dontrun{ 41 | # Save shapefile on Google Cloud 42 | flyio_set_datasource("gcs") 43 | flyio_set_bucket("your-bucket-name") 44 | export_shp(your-shp, "your-shp.shp", driver = "ESRI Shapefile", overwrite = T, dir = tempdir()) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /man/import_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_table.R 3 | \name{import_table} 4 | \alias{import_table} 5 | \title{Read csv, Excel files, txt} 6 | \usage{ 7 | import_table(file, FUN = data.table::fread, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{file}{path of the file to be read} 14 | 15 | \item{FUN}{the function using which the file is to be read} 16 | 17 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 18 | 19 | \item{bucket}{the name of the bucket, if not set globally} 20 | 21 | \item{dir}{the directory to store intermediate files} 22 | 23 | \item{delete_file}{logical. to delete the file downloaded} 24 | 25 | \item{show_progress}{logical. Shows progress of the download operation} 26 | 27 | \item{...}{other parameters for the FUN function defined above} 28 | } 29 | \value{ 30 | the output of the FUN function 31 | } 32 | \description{ 33 | Read tabular data from anywhere using a function defined by you 34 | } 35 | \examples{ 36 | # for data on local 37 | filepath = system.file("extdata", "mtcars.csv", package = "flyio") 38 | data = import_table(filepath, FUN = read.csv, data_source = "local") 39 | \dontrun{ 40 | # for data on cloud 41 | flyio_set_datasource("gcs") 42 | flyio_set_bucket("your-bucket-name") 43 | data = import_table("excel-file-on-gcs.xlsx", read_excel, dir = tempdir()) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /man/list_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/list_files.R 3 | \name{list_files} 4 | \alias{list_files} 5 | \title{List the Files in a Directory/Folder} 6 | \usage{ 7 | list_files(path = "", pattern = NULL, recursive = FALSE, 8 | ignore.case = FALSE, full.names = TRUE, 9 | data_source = flyio_get_datasource(), 10 | bucket = flyio_get_bucket(data_source), check_region = FALSE) 11 | } 12 | \arguments{ 13 | \item{path}{the folder for which the files need to be listed} 14 | 15 | \item{pattern}{an optional regular expression. Only file path names that match the regular expression will be returned.} 16 | 17 | \item{recursive}{logical. Should the listing recurse into directories?} 18 | 19 | \item{ignore.case}{logical. Should pattern-matching be case-insensitive?} 20 | 21 | \item{full.names}{logical. Should the entire path be returned or only after the path inputed?} 22 | 23 | \item{data_source}{the name of the data source, gcs, s3 or local; if not set globally} 24 | 25 | \item{bucket}{the name of the bucket, if not set globally} 26 | 27 | \item{check_region}{logical. to check region for aws.s3} 28 | } 29 | \value{ 30 | a vector of full file names 31 | } 32 | \description{ 33 | list the files in cloud or locally - similar to list.files() 34 | } 35 | \examples{ 36 | # List files locally 37 | list_files(path = tempdir(), data_source = "local") 38 | \dontrun{ 39 | # List files on S3 40 | flyio_set_datasource("s3") 41 | flyio_set_bucket("your-bucket-name") 42 | list_files(path = "tests/", pattern = ".*csv") 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /R/list_bucket.R: -------------------------------------------------------------------------------- 1 | #' List buckets for cloud storage 2 | #' 3 | #' @param data_source default to local. Possible options : gcs, s3, local. Case insensitive 4 | #' @param gcs_project Project containing buckets to list in Google Cloud Storage 5 | #' 6 | #' @return vector of bucket names 7 | #' @import "googleCloudStorageR" "aws.s3" "assertthat" 8 | #' @export "list_bucket" 9 | #' 10 | #' @examples 11 | #' # No buckets if data source is local 12 | #' list_bucket(data_source = "local") 13 | #' \dontrun{ 14 | #' flyio_set_datasource("s3") 15 | #' flyio_auth() # authentication needed for S3 16 | #' list_bucket() 17 | #' } 18 | list_bucket <- function(data_source = flyio_get_datasource(), gcs_project = ""){ 19 | 20 | # checking if data_source input is valid 21 | invisible(assertthat::assert_that(stringr::str_to_lower(data_source) %in% c("local", "gcs", "s3"), 22 | msg = "data_source should be either local, gcs or s3")) 23 | 24 | # if data source is local return 25 | if(str_to_lower(data_source) == "local"){ 26 | message("data_source is set to Local. No buckets.") 27 | return(invisible("")) 28 | } 29 | 30 | # running authentication for set data source 31 | if(str_to_lower(data_source) == "gcs"){ 32 | invisible(assertthat::assert_that(gcs_project != "", 33 | msg = "Please specift the project containing buckets to list")) 34 | buckets = gcs_list_buckets(projectId = gcs_project) 35 | } else if(str_to_lower(data_source) == "s3"){ 36 | buckets = bucketlist() 37 | } 38 | return(buckets) 39 | } 40 | -------------------------------------------------------------------------------- /man/import_shp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import_shp.R 3 | \name{import_shp} 4 | \alias{import_shp} 5 | \title{Read shapefiles} 6 | \usage{ 7 | import_shp(pathshp, FUN = rgdal::readOGR, dsnlayerbind = F, 8 | data_source = flyio_get_datasource(), 9 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), 10 | delete_file = TRUE, show_progress = FALSE, ...) 11 | } 12 | \arguments{ 13 | \item{pathshp}{path of the shp file to be read} 14 | 15 | \item{FUN}{the function using which the file is to be read} 16 | 17 | \item{dsnlayerbind}{if the FUN needs dsn and layer binded or not} 18 | 19 | \item{data_source}{the name of the data source, if not set globally. s3, gcs or local} 20 | 21 | \item{bucket}{the name of the bucket, if not set globally} 22 | 23 | \item{dir}{the directory to store intermediate files} 24 | 25 | \item{delete_file}{logical. to delete the file downloaded} 26 | 27 | \item{show_progress}{logical. Shows progress of the download operation} 28 | 29 | \item{...}{other parameters for the FUN function defined above} 30 | } 31 | \value{ 32 | the output of the FUN function 33 | } 34 | \description{ 35 | Read shapefiles data from anywhere using a function defined by you 36 | } 37 | \examples{ 38 | \dontrun{ 39 | # import shapefile from Google Cloud 40 | flyio_set_datasource("gcs") 41 | flyio_set_bucket("your-bucket-name") 42 | t = import_shp("shptest-on-cloud.shp", FUN = readOGR, dsnlayerbind = F, dir = tempdir()) 43 | t = import_shp("shptest-on-cloud.shp", FUN = raster::shapefile, dsnlayerbind = T, dir = tempdir()) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /R/file_exists.R: -------------------------------------------------------------------------------- 1 | #' Check if a file exists 2 | #' 3 | #' @param path the entire path for the file 4 | #' @param data_source the name of the data source, if not set globally. s3, gsc or local 5 | #' @param bucket the name of the bucket, if not set globally 6 | #' 7 | #' @export "file_exists" 8 | #' @return logical. if the file exists or not 9 | #' 10 | #' @examples 11 | #' # Check with data source local 12 | #' file_exists(path = "tests/mtcars.csv", data_source = "local") 13 | #' \dontrun{ 14 | #' # Check with data source GCS 15 | #' flyio_set_datasource("gcs") 16 | #' flyio_set_bucket("your-bucket-name") 17 | #' file_exists(path = "tests/mtcars.csv") 18 | #' } 19 | 20 | file_exists <- function(path, data_source = flyio_get_datasource(), bucket = flyio_get_bucket(data_source)){ 21 | # getting the vector of all the files 22 | assert_that(is.character(path), is.character(bucket)) 23 | assert_that(str_to_lower(data_source) %in% c("gcs", "s3", "local"), msg = "Enter a valid data source") 24 | data_source = str_to_lower(data_source) 25 | if(data_source == "local"){ 26 | return(file.exists(path)) 27 | } 28 | tryCatch({ 29 | # dir_path = dirname(path) 30 | # dir_path = gsub("\\/+","/",dir_path) 31 | # dir_path = gsub("^\\/|^\\.\\/|^\\.","",dir_path) 32 | obj = list_files(path = path, recursive = T, 33 | data_source = data_source, bucket = bucket) 34 | }, error = function(err){ 35 | print(err) 36 | return(FALSE) 37 | }) 38 | 39 | # checking if the path lies in the above vector 40 | path = gsub("\\/+","/",path) 41 | if(path %in% obj){ 42 | return(TRUE) 43 | } else{ 44 | return(FALSE) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /R/flyio_datasource.R: -------------------------------------------------------------------------------- 1 | #' Set global data source name for flyio 2 | #' @description Set global data source name to be used for all the function in flyio 3 | #' @param data_source the DataSource name to be set 4 | #' 5 | #' @return stores the data source name in a global environment under flyioDataSource 6 | #' @export "flyio_set_datasource" 7 | #' @import "stringr" 8 | #' @examples flyio_set_datasource("local") 9 | flyio_set_datasource <- function(data_source){ 10 | invisible(assert_that(is.string(data_source) && str_to_lower(data_source) %in% c("gcs", "s3", "local"), msg = "Enter a valid data source name")) 11 | data_source = str_to_lower(data_source) 12 | Sys.setenv("CLOUD_STORAGE_NAME" = data_source) 13 | message("Default Data Source name set to '",data_source,"'") 14 | } 15 | 16 | #' Get global data source name for flyio 17 | #' @description Get global data source name to be used for all the functions in flyio. Returns the value stored using flyio_set_datasource 18 | #' @return the string - data source name stored 19 | #' @export "flyio_get_datasource" 20 | #' 21 | #' @examples 22 | #' # first setting the data source 23 | #' flyio_set_datasource("s3") 24 | #' # getting the data source 25 | #' flyio_get_datasource() 26 | flyio_get_datasource <- function(){ 27 | data_source = Sys.getenv("CLOUD_STORAGE_NAME") 28 | if(data_source == ""){ 29 | data_source = Sys.getenv("flyioDataSource") 30 | if(data_source != ""){ 31 | message("flyioDataSource env name is depreciated. Please use CLOUD_STORAGE_NAME.") 32 | } 33 | } 34 | invisible(assert_that(is.string(data_source) && data_source != "", msg = "No data source set. Use flyio_set_datasource to set the data source.")) 35 | return(data_source) 36 | } 37 | -------------------------------------------------------------------------------- /R/import_st.R: -------------------------------------------------------------------------------- 1 | #' Read geojson, geopkg 2 | #' @description Read geospatial data from anywhere using a function defined by you 3 | #' 4 | #' @param file path of the file to be read 5 | #' @param FUN the function using which the file is to be read 6 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file downloaded 10 | #' @param show_progress logical. Shows the progress of the download operation 11 | #' @param ... other parameters for the FUN function defined above 12 | #' 13 | #' @export "import_st" 14 | #' @return the output of the FUN function 15 | #' 16 | #' @examples 17 | #' \dontrun{ 18 | #' # for data on cloud 19 | #' flyio_set_datasource("gcs") 20 | #' flyio_set_bucket("your-bucket-name") 21 | #' data = import_table("excel-file-on-gcs.geojson", dir = tempdir()) 22 | #' } 23 | 24 | import_st <- function(file, FUN = sf::read_sf, data_source = flyio_get_datasource(), 25 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 26 | # checking if the file is valid 27 | if(data_source == "local"){ 28 | t = FUN(file, ...) 29 | return(t) 30 | } 31 | # a tempfile with the required extension 32 | temp <- paste0(dir, "/", basename(file)) 33 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 34 | # downloading the file 35 | file = gsub("\\/+","/",file) 36 | downlogical = import_file(bucketpath = file, localfile = temp, data_source = data_source, bucket = bucket, show_progress = show_progress) 37 | assert_that(is.character(downlogical), msg = "Downloading of file failed") 38 | # loading the file to the memory using user defined function 39 | result = FUN(temp, ...) 40 | return(result) 41 | } 42 | 43 | -------------------------------------------------------------------------------- /R/export_st.R: -------------------------------------------------------------------------------- 1 | #' Write geojson and geopkgs 2 | #' 3 | #' @param x variable name 4 | #' @param file path of the file to be written to 5 | #' @param FUN the function using which the file is to write 6 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file to be uploaded 10 | #' @param show_progress logical. Shows the progress of the upload operation 11 | #' @param ... other parameters for the FUN function defined above 12 | 13 | #' 14 | #' @return No output 15 | #' @export "export_st" 16 | #' @examples 17 | #' \dontrun{ 18 | #' # for data on cloud 19 | #' flyio_set_datasource("gcs") 20 | #' flyio_set_bucket("your-bucket-name") 21 | #' export_table(iris, "iris.geojson", dir = tempdir()) 22 | #' } 23 | 24 | export_st <- function(x, file, FUN = sf::write_sf, data_source = flyio_get_datasource(), 25 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 26 | 27 | # checking if the file is valid 28 | #assert_that(tools::file_ext(file) %in% c("csv", "xlsx", "xls", "txt"), msg = "Please input a valid path") 29 | if(data_source == "local"){ 30 | t = FUN(x, file, ...) 31 | return(invisible(t)) 32 | } 33 | # a tempfile with the required extension 34 | if(isTRUE(delete_file)){ 35 | temp <- paste0(dir, "/", basename(file)) 36 | on.exit(unlink(temp)) 37 | } else { 38 | temp <- paste0(dir, "/", basename(file)) 39 | } 40 | # loading the file to the memory using user defined function 41 | file = gsub("\\/+","/",file) 42 | FUN(x, temp, ...) 43 | # downloading the file 44 | export_file(localfile = temp, bucketpath = file, data_source = data_source, bucket = bucket, show_progress = show_progress) 45 | 46 | } 47 | 48 | -------------------------------------------------------------------------------- /R/import_rds.R: -------------------------------------------------------------------------------- 1 | #' Read RDS file 2 | #' @description Read R data - RDS file from anywhere 3 | #' @param file path of the file to be read 4 | #' @param FUN the function using which the file is to be read 5 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param dir the directory to store intermediate files 8 | #' @param delete_file logical. to delete the file downloaded 9 | #' @param show_progress logical. Shows progress of the download operation 10 | #' @param ... other parameters for the FUN function defined above 11 | #' @export "import_rds" 12 | #' @return the output of the FUN function 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' # Import RDS from Google Cloud 17 | #' flyio_set_datasource("gcs") 18 | #' flyio_set_bucket("your-bucket-name") 19 | #' import_rds("rds-on-cloud.rds", readRDS, dir = tempdir()) 20 | #' } 21 | 22 | import_rds <- function(file, FUN = readRDS, data_source = flyio_get_datasource(), 23 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 24 | 25 | # checking if the file is valid 26 | assert_that(tools::file_ext(file) %in% c("RDS", "rds"), msg = "Please input a valid path") 27 | if(data_source == "local"){ 28 | t = FUN(file, ...) 29 | return(t) 30 | } 31 | # a tempfile with the required extension 32 | temp <- paste0(dir, "/", basename(file)) 33 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 34 | # downloading the file 35 | file = gsub("\\/+","/",file) 36 | downlogical = import_file(bucketpath = file, localfile = temp, bucket = bucket, show_progress = show_progress) 37 | assert_that(is.character(downlogical), msg = "Downloading of file failed") 38 | # loading the file to the memory using user defined function 39 | result = FUN(temp, ...) 40 | return(result) 41 | } 42 | 43 | -------------------------------------------------------------------------------- /R/export_rda.R: -------------------------------------------------------------------------------- 1 | #' Write RDA files 2 | #' @description Write R data RDA file to anywhere from R 3 | #' @param ... R ojects need to be saved 4 | #' @param file path of the file to be written to 5 | #' @param FUN the function using which the file is to write 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file to be uploaded 10 | #' @param show_progress logical. Shows progress of the upload operation. 11 | #' 12 | #' @return No output 13 | #' @export "export_rda" 14 | #' @examples 15 | #' \dontrun{ 16 | #' # save RDA on Google Cloud Storage 17 | #' flyio_set_datasource("gcs") 18 | #' flyio_set_bucket("your-bucket-name") 19 | #' export_rda(iris, mtcars, "rda-on-cloud.rda", dir = tempdir()) 20 | #' } 21 | 22 | export_rda <- function(..., file, FUN = save, data_source = flyio_get_datasource(), 23 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE){ 24 | # checking if the file is valid 25 | assert_that(tools::file_ext(file) %in% c("rda", "Rda","RData"), msg = "Please input a valid path") 26 | if(data_source == "local"){ 27 | t = FUN(..., file = file) 28 | return(invisible(t)) 29 | } 30 | # a tempfile with the required extension 31 | if(isTRUE(delete_file)){ 32 | temp <- tempfile(fileext = paste0(".",tools::file_ext(file)), tmpdir = dir) 33 | on.exit(unlink(temp)) 34 | } else { 35 | temp <- paste0(dir, "/", basename(file)) 36 | } 37 | # loading the file to the memory using user defined function 38 | file = gsub("\\/+","/",file) 39 | FUN(..., file = temp) 40 | # uploading the file 41 | export_file(localfile = temp, bucketpath = file, data_source = data_source, bucket = bucket, show_progress = show_progress) 42 | 43 | } 44 | 45 | -------------------------------------------------------------------------------- /R/import_stack.R: -------------------------------------------------------------------------------- 1 | #' Read stack from GCS/S3 or local 2 | #' @description Read Stack/Brick data from anywhere using a function defined by you 3 | #' @param pathstack vector of paths of rasters (layers) 4 | #' @param FUN the function using which the file is to be read 5 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param dir the directory to store intermediate files 8 | #' @param delete_file logical. to delete the file downloaded 9 | #' @param show_progress logical. Shows progress of the download operation 10 | #' @param ... other parameters for the FUN function defined above 11 | #' @export "import_stack" 12 | #' @return the output of the FUN function 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' # Import stack from Google Cloud 17 | #' flyio_set_datasource("gcs") 18 | #' flyio_set_bucket("your-bucket-name") 19 | #' t = import_stack("tests/raster-cloud/", dir = tempdir()) 20 | #' } 21 | 22 | import_stack <- function(pathstack, FUN = raster::stack, data_source = flyio_get_datasource(), 23 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = FALSE, show_progress = FALSE, ...){ 24 | 25 | 26 | if(data_source == "local"){ 27 | result = FUN(pathstack, ...) 28 | return(result) 29 | } 30 | # downloading the file 31 | for(i in pathstack){ 32 | # a tempfile with the required extension 33 | temp <- paste0(dir, "/", basename(i)) 34 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 35 | # downloading the file 36 | downlogical = import_file(bucketpath = i, localfile = temp, 37 | data_source = data_source, bucket = bucket, overwrite = T, show_progress = show_progress) 38 | } 39 | # loading the file to the memory using user defined function 40 | result = FUN(paste0(dir, "/",basename(pathstack)), ...) 41 | return(result) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /R/export_raster.R: -------------------------------------------------------------------------------- 1 | #' Write raster 2 | #' @param x variable name 3 | #' @param file path of the file to be written to 4 | #' @param FUN the function using which the file is to write 5 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param dir the directory to store intermediate files 8 | #' @param delete_file logical. to delete the file to be uploaded 9 | #' @param show_progress logical. Shows progress of the upload operation. 10 | #' @param ... other parameters for the FUN function defined above 11 | #' @export "export_raster" 12 | #' @return No output 13 | #' 14 | #' @examples 15 | #' \dontrun{ 16 | #' flyio_set_datasource("gcs") 17 | #' flyio_set_bucket("your-bucket-name") 18 | #' r1 <- raster(nrows=108, ncols=21, xmn=0, xmx=10) 19 | #' export_raster(r1, "raster-cloud.tif", writeRaster, format = "GTiff", dir = tempdir()) 20 | #' } 21 | 22 | export_raster <- function(x, file, FUN = raster::writeRaster, data_source = flyio_get_datasource(), 23 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 24 | # checking if the file is valid 25 | #assert_that(tools::file_ext(file) %in% c("tif", "hdf"), msg = "Please input a valid path") 26 | if(data_source == "local"){ 27 | t = FUN(x, file, ...) 28 | return(invisible(t)) 29 | } 30 | # a tempfile with the required extension 31 | if(isTRUE(delete_file)){ 32 | temp <- paste0(dir, "/", basename(file)) 33 | on.exit(unlink(temp)) 34 | } else { 35 | temp <- paste0(dir, "/", basename(file)) 36 | } 37 | 38 | # loading the file to the memory using user defined function 39 | file = gsub("\\/+","/",file) 40 | FUN(x, temp, ...) 41 | # downloading the file 42 | export_file(localfile = temp, bucketpath = file, data_source = data_source, bucket = bucket, show_progress = show_progress) 43 | } 44 | 45 | -------------------------------------------------------------------------------- /R/export_rds.R: -------------------------------------------------------------------------------- 1 | #' Write RDS files 2 | #' @description Write R data RDS file to anywhere from R 3 | #' @param x variable name 4 | #' @param file path of the file to be written to 5 | #' @param FUN the function using which the file is to write 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file to be uploaded 10 | #' @param show_progress logical. Shows progress of the upload operation. 11 | #' @param ... other parameters for the FUN function defined above 12 | #' 13 | #' @return if FUN returns anything 14 | #' @export "export_rds" 15 | #' @examples 16 | #' \dontrun{ 17 | #' # save RDS on Google Cloud 18 | #' flyio_set_datasource("gcs") 19 | #' flyio_set_bucket("your-bucket-name") 20 | #' export_rds(iris, "iris-on-cloud.rds", saveRDS, dir = tempdir()) 21 | #' } 22 | 23 | export_rds <- function(x, file, FUN = saveRDS, data_source = flyio_get_datasource(), 24 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 25 | # checking if the file is valid 26 | assert_that(tools::file_ext(file) %in% c("RDS", "rds"), msg = "Please input a valid path") 27 | if(data_source == "local"){ 28 | t = FUN(x, file, ...) 29 | return(invisible(t)) 30 | } 31 | # a tempfile with the required extension 32 | if(isTRUE(delete_file)){ 33 | temp <- tempfile(fileext = paste0(".",tools::file_ext(file)), tmpdir = dir) 34 | on.exit(unlink(temp)) 35 | } else { 36 | temp <- paste0(dir, "/", basename(file)) 37 | } 38 | # loading the file to the memory using user defined function 39 | file = gsub("\\/+","/",file) 40 | FUN(x, temp, ...) 41 | # downloading the file 42 | export_file(localfile = temp, bucketpath = file, data_source = data_source, bucket = bucket, show_progress = show_progress) 43 | 44 | } 45 | 46 | -------------------------------------------------------------------------------- /R/import_rda.R: -------------------------------------------------------------------------------- 1 | #' Read RDA file 2 | #' @description Read RData or rda file from anywhere 3 | #' @param file path of the file to be read 4 | #' @param FUN the function using which the file is to be read 5 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param envir the environment in which to import the objects 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file downloaded 10 | #' @param show_progress logical. Shows progress of the download operation 11 | #' @param ... other parameters for the FUN function defined above 12 | #' @export "import_rda" 13 | #' @return the output of the FUN function 14 | #' 15 | #' @examples 16 | #' \dontrun{ 17 | #' # Load RDA from Google Cloud 18 | #' flyio_set_datasource("gcs") 19 | #' flyio_set_bucket("your-bucket-name") 20 | #' import_rda("rds-on-cloud.rda", dir = tempdir()) 21 | #' } 22 | 23 | import_rda <- function(file, FUN = load, data_source = flyio_get_datasource(), 24 | bucket = flyio_get_bucket(data_source), envir = globalenv(), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 25 | 26 | # checking if the file is valid 27 | assert_that(tools::file_ext(file) %in% c("rda", "Rda", "RData"), msg = "Please input a valid path") 28 | if(data_source == "local"){ 29 | FUN(file, envir = envir, ...) 30 | return(t) 31 | } 32 | # a tempfile with the required extension 33 | temp <- paste0(dir, "/", basename(file)) 34 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 35 | # downloading the file 36 | file = gsub("\\/+","/",file) 37 | downlogical = import_file(bucketpath = file, localfile = temp, bucket = bucket, show_progress = show_progress) 38 | assert_that(is.character(downlogical), msg = "Downloading of file failed") 39 | # loading the file to the memory using user defined function 40 | FUN(temp, envir = envir, ...) 41 | return(invisible()) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /R/import_raster.R: -------------------------------------------------------------------------------- 1 | #' Read raster files 2 | #' 3 | #' @description Read raster data from anywhere using a function defined by you 4 | #' @param file path of the file to be read 5 | #' @param FUN the function using which the file is to be read 6 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file downloaded 10 | #' @param show_progress logical. Shows progress of the download operation 11 | #' @param ... other parameters for the FUN function defined above 12 | #' 13 | #' @export "import_raster" 14 | #' @return the output of the FUN function 15 | #' 16 | #' @examples 17 | #' \dontrun{ 18 | #' # when data source is cloud 19 | #' flyio_set_datasource("gcs") 20 | #' flyio_set_bucket("your-bucket-name") 21 | #' library(raster) 22 | #' t = import_raster("your-raster.tif", FUN = raster, dir = tempdir()) 23 | #' } 24 | 25 | import_raster <- function(file, FUN = raster::raster, data_source = flyio_get_datasource(), 26 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = FALSE, show_progress = FALSE, ...){ 27 | 28 | # checking if the file is valid 29 | # assert_that(tools::file_ext(file) %in% c("tif", "hdf"), msg = "Please input a valid path") 30 | if(data_source == "local"){ 31 | t = FUN(file, ...) 32 | return(t) 33 | } 34 | # a tempfile with the required extension 35 | temp <- paste0(dir, "/", basename(file)) 36 | if(isTRUE(delete_file)){on.exit(unlink(temp))} # on.exit(unlink(temp)) 37 | # downloading the file 38 | file = gsub("\\/+","/",file) 39 | downlogical = import_file(bucketpath = file, localfile = temp, bucket = bucket, show_progress = show_progress) 40 | assert_that(is.character(downlogical), msg = "Downloading of file failed") 41 | # loading the file to the memory using user defined function 42 | result = FUN(temp, ...) 43 | return(result) 44 | } 45 | 46 | -------------------------------------------------------------------------------- /R/export_table.R: -------------------------------------------------------------------------------- 1 | #' Write csv, Excel files, txt 2 | #' 3 | #' @param x variable name 4 | #' @param file path of the file to be written to 5 | #' @param FUN the function using which the file is to write 6 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file to be uploaded 10 | #' @param show_progress logical. Shows progress of the upload operation. 11 | #' @param ... other parameters for the FUN function defined above 12 | 13 | #' 14 | #' @return No output 15 | #' @export "export_table" 16 | #' @examples 17 | #' # for data on local 18 | #' export_table(iris, paste0(tempdir(), "/iris.csv"), FUN = write.csv, data_source = "local") 19 | #' \dontrun{ 20 | #' # for data on cloud 21 | #' flyio_set_datasource("gcs") 22 | #' flyio_set_bucket("your-bucket-name") 23 | #' export_table(iris, "iris.csv", write.csv, dir = tempdir()) 24 | #' } 25 | 26 | export_table <- function(x, file, FUN = data.table::fwrite, data_source = flyio_get_datasource(), 27 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 28 | 29 | # checking if the file is valid 30 | #assert_that(tools::file_ext(file) %in% c("csv", "xlsx", "xls", "txt"), msg = "Please input a valid path") 31 | if(data_source == "local"){ 32 | t = FUN(x, file, ...) 33 | return(invisible(t)) 34 | } 35 | # a tempfile with the required extension 36 | if(isTRUE(delete_file)){ 37 | temp <- tempfile(fileext = paste0(".",tools::file_ext(file)), tmpdir = dir) 38 | on.exit(unlink(temp)) 39 | } else { 40 | temp <- paste0(dir, "/", basename(file)) 41 | } 42 | # loading the file to the memory using user defined function 43 | file = gsub("\\/+","/",file) 44 | FUN(x, temp, ...) 45 | # downloading the file 46 | export_file(localfile = temp, bucketpath = file, data_source = data_source, bucket = bucket, show_progress = show_progress) 47 | 48 | } 49 | 50 | -------------------------------------------------------------------------------- /R/import_table.R: -------------------------------------------------------------------------------- 1 | #' Read csv, Excel files, txt 2 | #' @description Read tabular data from anywhere using a function defined by you 3 | #' 4 | #' @param file path of the file to be read 5 | #' @param FUN the function using which the file is to be read 6 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file downloaded 10 | #' @param show_progress logical. Shows progress of the download operation 11 | #' @param ... other parameters for the FUN function defined above 12 | #' 13 | #' @export "import_table" 14 | #' @return the output of the FUN function 15 | #' 16 | #' @examples 17 | #' # for data on local 18 | #' filepath = system.file("extdata", "mtcars.csv", package = "flyio") 19 | #' data = import_table(filepath, FUN = read.csv, data_source = "local") 20 | #' \dontrun{ 21 | #' # for data on cloud 22 | #' flyio_set_datasource("gcs") 23 | #' flyio_set_bucket("your-bucket-name") 24 | #' data = import_table("excel-file-on-gcs.xlsx", read_excel, dir = tempdir()) 25 | #' } 26 | 27 | import_table <- function(file, FUN = data.table::fread, data_source = flyio_get_datasource(), 28 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 29 | # checking if the file is valid 30 | #assert_that(tools::file_ext(file) %in% c("csv", "xlsx", "xls", "txt"), msg = "Please input a valid path") 31 | if(data_source == "local"){ 32 | t = FUN(file, ...) 33 | return(t) 34 | } 35 | # a tempfile with the required extension 36 | temp <- paste0(dir, "/", basename(file)) 37 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 38 | # downloading the file 39 | file = gsub("\\/+","/",file) 40 | downlogical = import_file(bucketpath = file, localfile = temp, data_source = data_source, bucket = bucket, show_progress = show_progress) 41 | assert_that(is.character(downlogical), msg = "Downloading of file failed") 42 | # loading the file to the memory using user defined function 43 | result = FUN(temp, ...) 44 | return(result) 45 | } 46 | 47 | -------------------------------------------------------------------------------- /R/flyio_bucket.R: -------------------------------------------------------------------------------- 1 | #' Set global bucket name for flyio 2 | #' @description Set global bucket name to be used for all the functions in flyio 3 | #' @param bucket the bucket name to be set 4 | #' @param data_source the data source used for I/O. Default chooses the data source set using flyio_set_datasource() 5 | #' 6 | #' @return stores the bucket name in a global environment under flyioBucketGcs or flyioBucketS3 7 | #' @export "flyio_set_bucket" 8 | #' @import "stringr" 9 | #' 10 | #' @examples flyio_set_bucket(bucket = "your-bucket-name", data_source = "S3") 11 | flyio_set_bucket <- function(bucket, data_source = flyio_get_datasource()){ 12 | data_source = str_to_title(data_source) 13 | assert_that(data_source %in% c("Gcs", "S3", "Local"), msg = "Enter a valid data source") 14 | if(data_source == "Local"){ 15 | message("For local data source, bucket name not required") 16 | return(invisible(bucket)) 17 | } 18 | assert_that(is.string(bucket) && bucket != "", msg = "Enter a valid bucket name") 19 | if(data_source == "Gcs"){ 20 | Sys.setenv("flyioBucketGcs" = bucket) 21 | message("Default bucket name for ",data_source ," set to '",bucket,"'") 22 | } else if(data_source == "S3"){ 23 | Sys.setenv("flyioBucketS3" = bucket) 24 | message("Default bucket name for ",data_source ," set to '",bucket,"'") 25 | } 26 | 27 | } 28 | 29 | #' Get global bucket name for flyio 30 | #' @description Get global bucket name to be used for all the functions in flyio 31 | #' @param data_source the data source used for I/O. Default chooses the data source set using flyio_set_datasource() 32 | #' @return the string - bucket name stored 33 | #' @details if the data source is local, then an empty string is returned 34 | #' @export "flyio_get_bucket" 35 | #' @import "stringr" 36 | #' 37 | #' @examples 38 | #' # first setting the bucket for a data source 39 | #' flyio_set_bucket(bucket = "socialcops-test", data_source = "S3") 40 | #' # retrieving the bucket for S3 41 | #' flyio_get_bucket(data_source = "S3") 42 | flyio_get_bucket <- function(data_source = flyio_get_datasource()){ 43 | assert_that(str_to_lower(data_source) %in% c("gcs", "s3", "local"), msg = "Enter a valid data source") 44 | data_source = str_to_title(data_source) 45 | if(data_source == "Local") return("") 46 | bucket = Sys.getenv(paste0("flyioBucket",data_source)) 47 | invisible(assert_that(is.string(bucket) && bucket != "", msg = "No bucket set. Use flyio_set_bucket to set the bucket name globally.")) 48 | return(bucket) 49 | } 50 | -------------------------------------------------------------------------------- /R/flyio_dir.R: -------------------------------------------------------------------------------- 1 | #' Set global directory for flyio to store data 2 | #' 3 | #' @description Set global directory where flyio functions will download intermidiate files 4 | #' @param dir the directory to store intermediate files 5 | #' @return stores the directory in a global environment under CLOUD_DIR 6 | #' @export "flyio_set_dir" 7 | #' @import "assertthat" 8 | #' 9 | #' @examples flyio_set_dir(dir = tempdir()) 10 | flyio_set_dir <- function(dir = paste0(tempdir(), "/flyio",Sys.getpid())){ 11 | if(dir == paste0(tempdir(), "/flyio",Sys.getpid()) & !dir.exists(paste0(tempdir(), "/flyio",Sys.getpid()))){ 12 | dir.create(paste0(tempdir(), "/flyio",Sys.getpid())) 13 | } 14 | assert_that(is.dir(dir), msg = "Enter a valid directory name") 15 | Sys.setenv("CLOUD_DIR" = normalizePath(dir, mustWork = FALSE)) 16 | message("Default directory name for flyio set to '",dir,"'") 17 | } 18 | 19 | #' Get global bucket name for flyio 20 | #' @description Get global directory where flyio functions will download intermidiate files 21 | #' @return the string - directory name 22 | #' @details if the directory is not set using flyio_set_dir(), it will return the paste0(tempdir(),"/flyio") 23 | #' @export "flyio_get_dir" 24 | #' 25 | #' @examples 26 | #' flyio_get_dir() 27 | flyio_get_dir <- function(){ 28 | dir = Sys.getenv("CLOUD_DIR") 29 | tmpdir = normalizePath(tempdir(), mustWork = FALSE) 30 | if(dir == ""){ 31 | dir = paste0(tmpdir, "/flyio",Sys.getpid()) 32 | } 33 | if(dir == paste0(tmpdir, "/flyio",Sys.getpid()) & !dir.exists(paste0(tmpdir, "/flyio",Sys.getpid()))){ 34 | dir.create(paste0(tmpdir, "/flyio",Sys.getpid())) 35 | } 36 | return(dir) 37 | } 38 | 39 | #' List files in flyio tmp folder 40 | #' @description Get the list of files downloaded by flyio in the default tmp folder 41 | #' @return the string - file names 42 | #' @export "flyio_list_dir" 43 | #' 44 | #' @examples 45 | #' flyio_list_dir() 46 | flyio_list_dir <- function(){ 47 | tmpdir = normalizePath(tempdir(), mustWork = FALSE) 48 | dir = paste0(tmpdir, "/flyio",Sys.getpid()) 49 | return(list.files(dir)) 50 | } 51 | 52 | #' Delete files in flyio tmp folder 53 | #' @description Delete the list of files downloaded by flyio in the default tmp folder 54 | #' @return files deleted 55 | #' @export "flyio_remove_dir" 56 | #' 57 | #' @examples 58 | #' flyio_remove_dir() 59 | flyio_remove_dir <- function(){ 60 | tmpdir = normalizePath(tempdir(), mustWork = FALSE) 61 | dir = paste0(tmpdir, "/flyio",Sys.getpid()) 62 | message("Deleting ", length(flyio_list_dir()), " files...") 63 | do.call(file.remove, list(list.files(dir, full.names = TRUE))) 64 | } 65 | 66 | 67 | -------------------------------------------------------------------------------- /R/export_file.R: -------------------------------------------------------------------------------- 1 | #' Upload a file from the local system to cloud 2 | #' @description Write a local file to the cloud, S3 or GCS 3 | #' @param localfile path of the file to be uploaded 4 | #' @param bucketpath path where the file needs to be uploaded, the file name can or cannot be present 5 | #' @param data_source the name of the data source, if not set globally. gcs or s3 6 | #' @param bucket the name of the bucket, if not set globally 7 | #' @param show_progress logical. Shows progress of the upload operation. 8 | #' @param ... other parameters for gcs_upload or aws.s3::put_object 9 | #' 10 | #' @export "export_file" 11 | #' @return the filename and path of the file in the bucket 12 | #' @import "googleCloudStorageR" "aws.s3" "assertthat" 13 | #' @examples 14 | #' \dontrun{ 15 | #' flyio_set_datasource("gcs") 16 | #' flyio_set_bucket("your-bucket-name") 17 | #' export_file("file-local.csv", "file-on-cloud.csv") 18 | #' } 19 | 20 | export_file <- function(localfile, bucketpath, data_source = flyio_get_datasource(), 21 | bucket = flyio_get_bucket(data_source), show_progress = FALSE, ...){ 22 | # Starting data checks -- 23 | ## valid inputs 24 | ## valid inputs 25 | assert_that(is.character(localfile), 26 | is.character(bucketpath)) 27 | 28 | ## data source should be either GCS or S3 29 | assert_that(str_to_lower(data_source)%in%c("gcs","s3"), 30 | msg = "Data source should be either GCS or S3") 31 | data_source = str_to_lower(data_source) 32 | 33 | ## file to upload exists 34 | assert_that(file.exists(localfile), msg = "Please enter a valid local path to a file") 35 | 36 | ## file extensions for both the paths are same 37 | if(tools::file_ext(localfile) != tools::file_ext(bucketpath)){ 38 | bucketpath = gsub("\\/+","/",paste0(bucketpath,"/",basename(localfile))) 39 | } 40 | 41 | ## its not a folder and only a file to upload 42 | assert_that(!is.dir(localfile), 43 | msg = "Cannot upload a folder. Make sure its a file.") 44 | 45 | # upload the file if everything is fine 46 | bucketpath = gsub("\\/+","/",bucketpath) 47 | if(data_source == "gcs"){ 48 | upload_return = gcs_upload(file = localfile, name = bucketpath, bucket = bucket, ...) 49 | } else if(data_source == "s3"){ 50 | l <- list(...) 51 | if(is.null(l$multipart)){ 52 | upload_return = aws.s3::put_object(file = localfile, bucket = bucket, object = bucketpath, multipart = TRUE, check_region = FALSE, ...) 53 | } else{ 54 | upload_return = aws.s3::put_object(file = localfile, bucket = bucket, object = bucketpath, check_region = FALSE, show_progress = show_progress, ...) 55 | } 56 | 57 | } 58 | return(invisible(bucketpath)) 59 | } 60 | 61 | -------------------------------------------------------------------------------- /R/import_shp.R: -------------------------------------------------------------------------------- 1 | #' Read shapefiles 2 | #' @description Read shapefiles data from anywhere using a function defined by you 3 | #' @param pathshp path of the shp file to be read 4 | #' @param FUN the function using which the file is to be read 5 | #' @param dsnlayerbind if the FUN needs dsn and layer binded or not 6 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param dir the directory to store intermediate files 9 | #' @param delete_file logical. to delete the file downloaded 10 | #' @param show_progress logical. Shows progress of the download operation 11 | #' @param ... other parameters for the FUN function defined above 12 | #' @export "import_shp" 13 | #' @return the output of the FUN function 14 | #' 15 | #' @examples 16 | #' \dontrun{ 17 | #' # import shapefile from Google Cloud 18 | #' flyio_set_datasource("gcs") 19 | #' flyio_set_bucket("your-bucket-name") 20 | #' t = import_shp("shptest-on-cloud.shp", FUN = readOGR, dsnlayerbind = F, dir = tempdir()) 21 | #' t = import_shp("shptest-on-cloud.shp", FUN = raster::shapefile, dsnlayerbind = T, dir = tempdir()) 22 | #' } 23 | 24 | import_shp <- function(pathshp, FUN = rgdal::readOGR, dsnlayerbind = F, data_source = flyio_get_datasource(), 25 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 26 | filename = basename(pathshp) 27 | layer = gsub(paste0("\\.",tools::file_ext(pathshp),"$"), "", filename) 28 | dsn = gsub(paste0(filename,"$"),"", pathshp) 29 | dsnlayer = pathshp 30 | shpfiles = list_files(path = dsn, pattern = paste0(layer,"."), data_source = data_source, bucket = bucket) 31 | shpfiles = grep("dbf|prj|shp|shx|cpg|qpj", shpfiles, value = T) 32 | if(data_source == "local"){ 33 | if(!isTRUE(dsnlayerbind)){ 34 | result = FUN(dsn, layer, ...) 35 | } else { 36 | result = FUN(paste0(dsn, "/",layer), ...) 37 | } 38 | return(result) 39 | } 40 | # downloading the file 41 | for(i in shpfiles){ 42 | # a tempfile with the required extension 43 | temp <- paste0(dir, "/", paste0(layer,"."),tools::file_ext(i)) 44 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 45 | # downloading the file 46 | downlogical = import_file(bucketpath = i, localfile = temp, 47 | data_source = data_source, bucket = bucket, overwrite = T, show_progress = show_progress) 48 | } 49 | # loading the file to the memory using user defined function 50 | if(!isTRUE(dsnlayerbind)){ 51 | result = FUN(dir, layer, ...) 52 | } else { 53 | result = FUN(paste0(dir, "/",layer), ...) 54 | } 55 | return(result) 56 | } 57 | 58 | -------------------------------------------------------------------------------- /R/export_shp.R: -------------------------------------------------------------------------------- 1 | #' Write shapefiles 2 | #' 3 | #' @param obj R object to be written 4 | #' @param pathshp the path of the shapefile, which may or may not include the extension 5 | #' @param FUN the function using which the file is to be read 6 | #' @param dsnlayerbind if the FUN needs dsn and layer binded or not 7 | #' @param data_source the name of the data source, if not set globally. s3, gcs or local 8 | #' @param bucket the name of the bucket, if not set globally 9 | #' @param dir the directory to store intermediate files 10 | #' @param delete_file logical. to delete the file to be uploaded 11 | #' @param show_progress logical. Shows progress of the upload operation. 12 | #' @param ... other parameters for the FUN function defined above 13 | #' @export "export_shp" 14 | #' @return output of the FUN function if any 15 | #' 16 | #' @examples 17 | #' \dontrun{ 18 | #' # Save shapefile on Google Cloud 19 | #' flyio_set_datasource("gcs") 20 | #' flyio_set_bucket("your-bucket-name") 21 | #' export_shp(your-shp, "your-shp.shp", driver = "ESRI Shapefile", overwrite = T, dir = tempdir()) 22 | #' } 23 | 24 | 25 | export_shp <- function(obj, pathshp, FUN = rgdal::writeOGR, dsnlayerbind = F, data_source = flyio_get_datasource(), 26 | bucket = flyio_get_bucket(data_source), dir = flyio_get_dir(), delete_file = TRUE, show_progress = FALSE, ...){ 27 | filename = basename(pathshp) 28 | layer = gsub(paste0("\\.",tools::file_ext(pathshp),"$"), "", filename) 29 | dsn = gsub(paste0(filename,"$"),"", pathshp) 30 | dsnlayer = pathshp 31 | l <- list(...) 32 | if(missing(FUN) & is.null(l$driver)){ 33 | FUN1 <- function(...){ 34 | FUN(..., driver = "ESRI Shapefile") 35 | } 36 | } else{ 37 | FUN1 = FUN 38 | } 39 | if(data_source == "local"){ 40 | if(dsnlayerbind == F){ 41 | result = FUN1(obj, dsn, layer, ...) 42 | } else{ 43 | result = FUN1(obj, dsnlayer, ...) 44 | } 45 | return(invisible(result)) 46 | } 47 | if(dsnlayerbind == F){ 48 | result = FUN1(obj, dir, layer, ...) 49 | } else{ 50 | tmplayer = gsub("\\/+","/", paste0(dir,"/",layer,".shp")) 51 | result = FUN1(obj, tmplayer, ...) 52 | } 53 | shpfiles = list.files(path = dir, pattern = paste0(layer,".")) 54 | shpfiles = grep("dbf|prj|shp|shx|cpg|qpj", shpfiles, value = T) 55 | # downloading the file 56 | for(i in shpfiles){ 57 | # a tempfile with the required extension 58 | temp <- paste0(dir, "/", i) 59 | if(isTRUE(delete_file)){on.exit(unlink(temp))} 60 | # uploading the file 61 | dsnlayer_i = gsub(paste0("\\.",tools::file_ext(dsnlayer),"$"), "", dsnlayer) 62 | downlogical = export_file(localfile = temp, bucketpath = paste0(dsnlayer_i, ".", tools::file_ext(i)), 63 | bucket = bucket, data_source = data_source, show_progress = show_progress) 64 | } 65 | } 66 | 67 | -------------------------------------------------------------------------------- /R/import_file.R: -------------------------------------------------------------------------------- 1 | 2 | #' Download file from cloud to local system 3 | #' @description Save a single file from the cloud to your local drive 4 | #' @param bucketpath path of file in the bucket 5 | #' @param localfile path where the file needs to be downloaded. The file name and extension also need to be present; if not, the current file name will be considered 6 | #' @param data_source the name of the data source, if not set globally, gcs or s3 7 | #' @param bucket the name of the bucket, if not set globally 8 | #' @param overwrite logical. If the files should be overwritten if already present 9 | #' @param show_progress logical. Shows progress of the download operation 10 | #' @param ... other parameters for gcs_get_object or save_object 11 | #' 12 | #' @return the filename and path of the object saved to local 13 | #' @export "import_file" 14 | #' @import "googleCloudStorageR" "aws.s3" "stringr" "assertthat" 15 | #' @examples 16 | #' \dontrun{ 17 | #' # import data from GCS to Local 18 | #' flyio_set_datasource("gcs") 19 | #' flyio_set_bucket("your-bucket-name") 20 | #' import_file("mtcars.csv", paste0(tempdir(), "/mtcars.csv"), overwrite = T) 21 | #' } 22 | 23 | import_file <- function(bucketpath, localfile, data_source = flyio_get_datasource(), 24 | bucket = flyio_get_bucket(data_source), overwrite = TRUE, show_progress = FALSE, ...){ 25 | # Starting data checks -- 26 | ## valid inputs 27 | assert_that(is.character(localfile), 28 | is.character(bucketpath)) 29 | 30 | ## data source should be either GCS or S3 31 | assert_that(str_to_lower(data_source)%in%c("gcs","s3"), 32 | msg = "Data source should be either GCS or S3") 33 | data_source = str_to_lower(data_source) 34 | 35 | ## file to upload exists 36 | assert_that(file_exists(bucketpath, bucket = bucket, data_source = data_source), 37 | msg = "Please enter a valid bucket path to a file") 38 | 39 | ## file extensions for both the paths are same 40 | if(tools::file_ext(localfile) != tools::file_ext(bucketpath)){ 41 | is.dir(localfile) 42 | localfile = gsub("\\/+","/",paste0(localfile,"/",basename(bucketpath))) 43 | } 44 | 45 | ## its not a folder and only a file to upload 46 | assert_that(tools::file_ext(localfile) != "" & tools::file_ext(bucketpath) != "", 47 | msg = "Cannot upload a folder. Make sure its a file.") 48 | 49 | # upload the file if everything is fine 50 | bucketpath = gsub("\\/+","/",bucketpath) 51 | if(data_source == "gcs"){ 52 | save_file = gcs_get_object(object_name = bucketpath,bucket = bucket, saveToDisk = localfile, overwrite = overwrite, ...) 53 | } else if(data_source == "s3"){ 54 | save_file = aws.s3::save_object(object = bucketpath, bucket = bucket, file = localfile, overwrite = overwrite, check_region = FALSE, show_progress = show_progress, ...) 55 | } 56 | return(invisible(localfile)) 57 | } 58 | -------------------------------------------------------------------------------- /R/list_files.R: -------------------------------------------------------------------------------- 1 | #' List the Files in a Directory/Folder 2 | #' @description list the files in cloud or locally - similar to list.files() 3 | #' @param path the folder for which the files need to be listed 4 | #' @param pattern an optional regular expression. Only file path names that match the regular expression will be returned. 5 | #' @param recursive logical. Should the listing recurse into directories? 6 | #' @param ignore.case logical. Should pattern-matching be case-insensitive? 7 | #' @param full.names logical. Should the entire path be returned or only after the path inputed? 8 | #' @param data_source the name of the data source, gcs, s3 or local; if not set globally 9 | #' @param bucket the name of the bucket, if not set globally 10 | #' @param check_region logical. to check region for aws.s3 11 | #' 12 | #' @export "list_files" 13 | #' @return a vector of full file names 14 | #' @import "googleCloudStorageR" "aws.s3" "stringr" 15 | #' @examples 16 | #' # List files locally 17 | #' list_files(path = tempdir(), data_source = "local") 18 | #' \dontrun{ 19 | #' # List files on S3 20 | #' flyio_set_datasource("s3") 21 | #' flyio_set_bucket("your-bucket-name") 22 | #' list_files(path = "tests/", pattern = ".*csv") 23 | #' } 24 | 25 | list_files <- function(path = "", pattern = NULL, recursive = FALSE, 26 | ignore.case = FALSE, full.names = TRUE, 27 | data_source = flyio_get_datasource(), bucket = flyio_get_bucket(data_source), check_region = FALSE){ 28 | assert_that(is.character(path)) 29 | assert_that(str_to_lower(data_source) %in% c("gcs", "s3", "local"), msg = "Input a valid data source") 30 | data_source = str_to_lower(data_source) 31 | 32 | if(data_source == "local"){ 33 | return(list.files(path = path, pattern = pattern, recursive = recursive, ignore.case = ignore.case,full.names = full.names)) 34 | } 35 | 36 | # getting the vector of all the filenames, with path as prefix 37 | path = gsub("\\/+","/",path) 38 | path = gsub("^\\/|^\\.\\/|^\\.","",path) 39 | if(data_source == "gcs"){ 40 | obj = gcs_list_objects(bucket = bucket, detail = "summary", prefix = path)$name 41 | } else if(data_source == "s3"){ 42 | obj = unname(unlist(lapply(get_bucket(bucket = bucket, prefix = path, max = Inf,check_region = check_region), `[[`, 1))) 43 | } 44 | 45 | # if pattern is provided 46 | if(!is.null(pattern)){ 47 | # look for the pattern in the file paths 48 | subsetpattern = grep(pattern, obj, ignore.case = ignore.case) 49 | if(length(subsetpattern)>0){ 50 | obj = obj[subsetpattern] 51 | } else{ 52 | obj = character(0) 53 | } 54 | } 55 | # if not recursive delete all which are in other folder 56 | if(!isTRUE(recursive)){ 57 | obj = obj[grep(paste0("^",path,".{1}[A-Za-z0-9_,\\s-]+[.]{1}[A-Za-z]{1+}$"), obj)] 58 | } 59 | if(!isTRUE(full.names)){ 60 | obj = sub(paste0("^",path),"",obj) 61 | obj = gsub("^.*?\\/","", obj) 62 | } 63 | return(obj) 64 | } 65 | 66 | 67 | -------------------------------------------------------------------------------- /R/export_folder.R: -------------------------------------------------------------------------------- 1 | #' Upload a folder from the local system to cloud 2 | #' @description Write a local folder to the cloud, S3 or GCS 3 | #' @param localfolder path of the folder in which all the files are to be uploaded 4 | #' @param pattern pattern of the file names in the folder to be uploaded 5 | #' @param overwrite if files need to be overwritten (if already present) 6 | #' @param bucketpath path of the folder in which the files are to be uploaded 7 | #' @param data_source the name of the data source, if not set globally. can be gcs or s3 8 | #' @param bucket the name of the bucket, if not set globally 9 | #' @param show_progress logical. Shows progress of the upload operation. 10 | #' @param ... other parameters for gcs/s3 upload 11 | #' 12 | #' @export "export_folder" 13 | #' @return the filename and path of the file in the bucket 14 | #' @import "googleCloudStorageR" "aws.s3" "assertthat" 15 | #' @examples 16 | #' \dontrun{ 17 | #' flyio_set_datasource("gcs") 18 | #' flyio_set_bucket("your-bucket-name") 19 | #' export_folder("folder-local/", "folder-on-cloud/") 20 | #' } 21 | 22 | export_folder <- function(localfolder, bucketpath, pattern = "*", overwrite = TRUE, data_source = flyio_get_datasource(), 23 | bucket = flyio_get_bucket(data_source), show_progress = FALSE, ...){ 24 | # Starting data checks -- 25 | ## valid inputs 26 | assert_that(is.character(localfolder), 27 | is.character(bucketpath)) 28 | 29 | ## data source should be either GCS or S3 30 | assert_that(str_to_lower(data_source)%in%c("gcs","s3"), 31 | msg = "Data source should be either GCS or S3") 32 | data_source = str_to_lower(data_source) 33 | 34 | ## its not a folder and only a file to upload 35 | assert_that(tools::file_ext(localfolder) == "" & tools::file_ext(bucketpath) == "", 36 | msg = "Cannot upload a file. Make sure its a folder") 37 | 38 | ## file to upload exists 39 | file_upload = list.files(localfolder, pattern = pattern, full.names = T, recursive = T) 40 | assert_that(length(file_upload)>0, msg = "Please enter a valid local folder with files") 41 | 42 | #removing extra / 43 | bucketpath = gsub("\\/+","/",bucketpath) 44 | 45 | # if overwrite or not 46 | if(!isTRUE(overwrite)){ 47 | #removing extra / 48 | file_upload = gsub("\\/+","/",file_upload) 49 | localfolder = gsub("\\/+","/",localfolder) 50 | 51 | bucketfiles = list_files(path = bucketpath, pattern = pattern, recursive = T, 52 | data_source = data_source, bucket = bucket) 53 | bucketfiles = gsub(paste0("^",bucketpath), "", bucketfiles, fixed = T) 54 | bucketfiles = gsub("^\\/+","",bucketfiles) 55 | localfiles = gsub(paste0("^",localfolder), "", file_upload, fixed = T) 56 | localfiles = gsub("^\\/+","",localfiles) 57 | commonfiles = which(localfiles %in% bucketfiles) 58 | if(length(commonfiles)>0){ 59 | file_upload = file_upload[-commonfiles,] 60 | assert_that(length(file_upload)>0, msg = "All files already exists") 61 | } 62 | } 63 | 64 | # upload the file if everything is fine 65 | pb <- txtProgressBar(min = 0, max = length(file_upload), style = 3) 66 | for(i in 1:length(file_upload)){ 67 | bucketpath = gsub("\\/+$","",bucketpath) 68 | if(data_source == "gcs"){ 69 | upload_return = gcs_upload(file = file_upload[i], name = paste0(bucketpath,"/", basename(file_upload[i])), bucket = bucket, ...) 70 | } else if(data_source == "s3"){ 71 | upload_return = aws.s3::put_object(file = file_upload[i], bucket = bucket, object = paste0(bucketpath,"/", basename(file_upload[i])), show_progress = show_progress, ...) 72 | } 73 | setTxtProgressBar(pb, i) 74 | } 75 | close(pb) 76 | return(invisible(bucketpath)) 77 | } 78 | 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # flyio - Make data fly to R 2 | Input and output data from R — download, upload, read and write objects from AWS S3, GoogleCloudStorage or local file system from a single interface. 3 | 4 | ![](http://www.r-pkg.org/badges/version/flyio) ![](https://cranlogs.r-pkg.org/badges/flyio) 5 | 6 | ## Overview 7 | 8 | **flyio** provides a common interface to interact with data from cloud storage providers or local storage directly from R. It currently supports AWS S3 and Google Cloud Storage, thanks to the API wrappers provided by cloudyr. **flyio** also supports reading or writing tables, rasters, shapefiles and R objects to the data source from memory. 9 | 10 | 11 | 12 | - `flyio_set_datasource()`: Set the data source (GCS, S3 or local) for all the other functions in flyio. 13 | - `flyio_auth()`: Authenticate data source (GCS or S3) so that you have access to the data. In a single session, different data sources can be authenticated. 14 | - `flyio_set_bucket()`: Set the bucket name once for any or both data sources so that you don't need to write it in each function. 15 | - `list_files()`: List the files in the bucket/folder. 16 | - `file_exists()`: Check if a file exists in the bucket/folder. 17 | - `export_[file/folder]()`: Upload a file/folder to S3 or GCS from R. 18 | - `import_file()`: Download a file from S3 or GCS. 19 | - `import_[table/raster/stack/shp/rds/rda/st]()`: Read a file from the set data source and bucket from a user-defined function. 20 | - `export_[table/raster/shp/rds/rda/st]()`: Write a file to the set data source and bucket from a user-defined function. 21 | 22 | For global usage, the datsource, authentication keys and bucket can be set in the environment variables of the machine so that one does not have to input it every time. 23 | - For datasource:`CLOUD_STORAGE_NAME` 24 | - For bucket name: `flyioBucketS3` or `flyioBucketGcs` 25 | - For authentication: `GCS_AUTH_FILE` or `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_DEFAULT_REGION` (For AWS S3, if the awscli is athenticated, then this step is not needed) 26 | 27 | ## Installation 28 | 29 | ``` r 30 | # Install the stable version from CRAN: 31 | install.packages("flyio") 32 | 33 | # Install the latest dev version from GitHub: 34 | install.packages("devtools") 35 | devtools::install_github("atlanhq/flyio") 36 | 37 | # Load the library 38 | library(flyio) 39 | ``` 40 | If you encounter a bug, please file an issue with steps to reproduce it on Github. Please use the same for any feature requests, enhancements or suggestions. 41 | 42 | ### Example 43 | 44 | ``` r 45 | # Setting the data source 46 | flyio_set_datasource("gcs") 47 | 48 | # Verify if the data source is set 49 | flyio_get_datasource() 50 | 51 | # Authenticate the default data source and set bucket 52 | flyio_auth("key.json") 53 | flyio_set_bucket("atlanhq-flyio") 54 | 55 | # Authenticate S3 also 56 | flyio_auth(c("AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_DEFAULT_REGION", "AWS_SESSION_TOKEN"), data_source = "s3") 57 | flyio_set_bucket("atlanhq-flyio", data_source = "s3") 58 | 59 | # Listing the files in GCS 60 | list_files(path = "test", pattern = "*csv") 61 | 62 | # Saving mtcars to all the data sources using default function write.csv 63 | export_table(mtcars, "~/Downloads/mtcars.csv", data_source = "local") 64 | export_table(mtcars, "test/mtcars.csv") # saving to GCS, need not mention as set globally 65 | export_table(mtcars, "test/mtcars.csv", data_source = "s3") 66 | 67 | # Check if the file written exists in GCS 68 | file_exists("test/mtcars.csv") 69 | 70 | # Read the file from GCS using readr library 71 | mtcars <- import_table("test/mtcars.csv", FUN = readr::read_csv) 72 | 73 | ``` 74 | 75 | ## References 76 | * Cloudyr GCS wrapper: https://github.com/cloudyr/googleCloudStorageR 77 | * Cloudyr S3 wrapper: https://github.com/cloudyr/aws.s3 78 | 79 |

80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /R/flyio_auth.R: -------------------------------------------------------------------------------- 1 | #' Authenticate flyio 2 | #' @description Authenticate any of the cloud storage platforms to perform any I/O 3 | #' @param auth_list path to the json file or the system environment name in case of gcs. For s3 a vector for access_key, secret_access_key, region (optional; default us-east-1) and session_id (optional); this could also be a single comma-separated string. If left blank, then for s3 it will pick from ~/.aws/credentials file 4 | #' @param data_source default to local. Possible options : gcs, s3, local. Case insensitive 5 | #' @param scope the scope of the auth if gcs. Default: https://www.googleapis.com/auth/devstorage.full_control 6 | #' @param awsprofile if auth_list = "", which profile to use from ~/.aws/credentials 7 | #' @export "flyio_auth" 8 | #' @import "googleCloudStorageR" "stringr" "aws.s3" "assertthat" "utils" "tools" 9 | #' @examples 10 | #' flyio_set_datasource("local") 11 | #' flyio_auth() 12 | #' 13 | 14 | 15 | flyio_auth <- function(auth_list = "", data_source = flyio_get_datasource(), 16 | scope = "https://www.googleapis.com/auth/devstorage.full_control", 17 | awsprofile = "default"){ 18 | 19 | # checking if data_source input is valid 20 | invisible(assertthat::assert_that(stringr::str_to_lower(data_source) %in% c("local", "gcs", "s3"), 21 | msg = "data_source should be either local, gcs or s3")) 22 | 23 | # if data source is local return 24 | if(str_to_lower(data_source) == "local"){ 25 | message("data_source is set to Local. No authetication required.") 26 | return(invisible(TRUE)) 27 | } 28 | 29 | # check the input for auth_list - split if comma present. 30 | if(length(auth_list) == 1 & auth_list != ""){ 31 | auth_list = stringr::str_trim(unlist(strsplit(auth_list, ","))) 32 | } else{ 33 | invisible(assertthat::assert_that(!is.list(auth_list), msg = "Please input a vector in auth_list")) 34 | } 35 | 36 | # check if the inputs are system environments 37 | if(sum(auth_list %in% names(Sys.getenv())) == length(auth_list)){ 38 | auth_list = Sys.getenv(auth_list) 39 | } 40 | 41 | # running authentication for set data source 42 | if(str_to_lower(data_source) == "gcs"){ 43 | auth_response = .gcsAuth(auth_list[1], scope) 44 | } else if(str_to_lower(data_source) == "s3"){ 45 | auth_response = .s3Auth(auth_list, awsprofile) 46 | } 47 | auth_response = assertthat::assert_that(isTRUE(auth_response), msg = "Authentication Failed!") 48 | } 49 | 50 | # helper functions to authentical a cloud storage source 51 | .gcsAuth <- function(auth_list, scope){ 52 | tryCatch({ 53 | tryCatch({ 54 | googleCloudStorageR::gcs_auth(auth_list) 55 | message("GCS Authenticated!") 56 | return(TRUE) 57 | }, error = function(err){ 58 | options(googleAuthR.scopes.selected = scope) 59 | Sys.setenv("GCS_AUTH_FILE" =auth_list) 60 | googleCloudStorageR::gcs_auth() 61 | message("GCS Authenticated!") 62 | return(TRUE) 63 | })}, error = function(err){ 64 | return(FALSE) 65 | }) 66 | 67 | } 68 | .s3Auth <- function(auth_list, awsprofile = "default"){ 69 | if(auth_list == ""){ 70 | auth_list = .awscred_profile(profile = awsprofile) 71 | } 72 | invisible(assertthat::assert_that(length(auth_list)>=2, msg = "Input access key and secret key for S3")) 73 | auth_list <- switch (as.character(length(auth_list)), 74 | "2" = c(auth_list, "us-east-1", ""), 75 | "3" = c(auth_list, "") 76 | ) 77 | Sys.setenv("AWS_ACCESS_KEY_ID" = auth_list[1], 78 | "AWS_SECRET_ACCESS_KEY" = auth_list[2], 79 | "AWS_DEFAULT_REGION" = auth_list[3], 80 | "AWS_SESSION_TOKEN" = auth_list[4]) 81 | tryCatch({invisible(capture.output(t1 <- bucketlist())) 82 | message("AWS S3 Authenticated!"); return(TRUE)}, error = function(err){}) 83 | 84 | } 85 | 86 | .awscred_profile <- function(profile = "default"){ 87 | awscreds = readLines("~/.aws/credentials") 88 | defaultprofile = which(awscreds == paste0("[",profile,"]"))[1] 89 | if(is.na(defaultprofile)){ 90 | return(c(aws_access_key_id="", 91 | aws_secret_access_key="")) 92 | } 93 | 94 | aws_access_key_id = strsplit(awscreds[defaultprofile+1], " = ")[[1]][2] 95 | aws_secret_access_key = strsplit(awscreds[defaultprofile+2], " = ")[[1]][2] 96 | awsregion = readLines("~/.aws/config") 97 | defaultregion = which(awsregion == paste0("[profile ",profile,"]"))[1] 98 | if(profile=="default"){ 99 | defaultregion = which(awsregion == "[default]") 100 | } 101 | if(is.na(defaultregion)){ 102 | return(c(aws_access_key_id, aws_secret_access_key)) 103 | } 104 | if(!(grepl("\\[",awsregion[defaultregion+1]) | length(awsregion) < (defaultregion+1))){ 105 | region = strsplit(awsregion[defaultregion+1], " = ")[[1]][2] 106 | return(c(aws_access_key_id, aws_secret_access_key, region)) 107 | } else{ 108 | return(c(aws_access_key_id, aws_secret_access_key)) 109 | } 110 | 111 | } 112 | 113 | --------------------------------------------------------------------------------