Skip to content
This repository has been archived by the owner on Oct 31, 2018. It is now read-only.

mdsumner/scsf

Repository files navigation

Travis-CI Build Status AppVeyor Build Status Coverage Status

scsf

Convert simple features to a generic common form that is more general and can be used for a wide variety of data structures.

This is work in progress and at a very early stage. More to come.

Example - sf to ggplot2 round trip

library(sf)
#> Linking to GEOS 3.5.1, GDAL 2.2.1, proj.4 4.9.3
## a MULTIPOLYGON layer
nc = st_read(system.file("shape/nc.shp", package="sf"))
#> Reading layer `nc' from data source `/perm_storage/home/mdsumner/R/x86_64-pc-linux-gnu-library/3.4/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type:  MULTIPOLYGON
#> dimension:      XY
#> bbox:           xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID):    4267
#> proj4string:    +proj=longlat +datum=NAD27 +no_defs

The common form is the entity tables, objects, paths, vertices and a link table to allow de-duplication of shared vertices. Currently this de-duplication is done on all coordinate fields, but for most applications it will usually be done only in X-Y.

library(scsf)
#> Loading required package: silicate
#> 
#> Attaching package: 'scsf'
#> The following objects are masked from 'package:silicate':
#> 
#>     inlandwaters, minimal_mesh
nc = st_read(system.file("gpkg/nc.gpkg", package="sf"))
#> Reading layer `nc.gpkg' from data source `/perm_storage/home/mdsumner/R/x86_64-pc-linux-gnu-library/3.4/sf/gpkg/nc.gpkg' using driver `GPKG'
#> Simple feature collection with 100 features and 14 fields
#> geometry type:  MULTIPOLYGON
#> dimension:      XY
#> bbox:           xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID):    4267
#> proj4string:    +proj=longlat +datum=NAD27 +no_defs


(bmodel <- PATH(nc))
#> $object
#> # A tibble: 100 x 15
#>     AREA PERIMETER CNTY_ CNTY_ID        NAME   FIPS FIPSNO CRESS_ID BIR74
#>  * <dbl>     <dbl> <dbl>   <dbl>      <fctr> <fctr>  <dbl>    <int> <dbl>
#>  1 0.114     1.442  1825    1825        Ashe  37009  37009        5  1091
#>  2 0.061     1.231  1827    1827   Alleghany  37005  37005        3   487
#>  3 0.143     1.630  1828    1828       Surry  37171  37171       86  3188
#>  4 0.070     2.968  1831    1831   Currituck  37053  37053       27   508
#>  5 0.153     2.206  1832    1832 Northampton  37131  37131       66  1421
#>  6 0.097     1.670  1833    1833    Hertford  37091  37091       46  1452
#>  7 0.062     1.547  1834    1834      Camden  37029  37029       15   286
#>  8 0.091     1.284  1835    1835       Gates  37073  37073       37   420
#>  9 0.118     1.421  1836    1836      Warren  37185  37185       93   968
#> 10 0.124     1.428  1837    1837      Stokes  37169  37169       85  1612
#> # ... with 90 more rows, and 6 more variables: SID74 <dbl>, NWBIR74 <dbl>,
#> #   BIR79 <dbl>, SID79 <dbl>, NWBIR79 <dbl>, object <chr>
#> 
#> $path
#> # A tibble: 108 x 6
#>     ncol         type subobject   object     path ncoords_
#>    <int>        <chr>     <int>    <chr>    <chr>    <int>
#>  1     2 MULTIPOLYGON         1 83cc972d 4ceb1a79       27
#>  2     2 MULTIPOLYGON         1 4e6c51ba 8aa608c2       26
#>  3     2 MULTIPOLYGON         1 220da212 f385bee3       28
#>  4     2 MULTIPOLYGON         1 a9be497e 09d360de       26
#>  5     2 MULTIPOLYGON         2 a9be497e c6b44df2        7
#>  6     2 MULTIPOLYGON         3 a9be497e 12d320f3        5
#>  7     2 MULTIPOLYGON         1 aedde6f5 a9854148       34
#>  8     2 MULTIPOLYGON         1 5205473e 2e295f5f       22
#>  9     2 MULTIPOLYGON         1 e422b229 8bd3fc64       24
#> 10     2 MULTIPOLYGON         1 df2eab80 398f9e7c       17
#> # ... with 98 more rows
#> 
#> $vertex
#> # A tibble: 1,255 x 3
#>           x_       y_  vertex_
#>        <dbl>    <dbl>    <chr>
#>  1 -81.47276 36.23436 843f3357
#>  2 -81.54084 36.27251 5fdeb201
#>  3 -81.56198 36.27359 1c9f2fab
#>  4 -81.63306 36.34069 1870fd10
#>  5 -81.74107 36.39178 8111a974
#>  6 -81.69828 36.47178 8817bcb8
#>  7 -81.70280 36.51934 23f22fa4
#>  8 -81.67000 36.58965 94b4c37c
#>  9 -81.34530 36.57286 2d3cfd7d
#> 10 -81.34754 36.53791 53c92a50
#> # ... with 1,245 more rows
#> 
#> $path_link_vertex
#> # A tibble: 2,529 x 2
#>        path  vertex_
#>       <chr>    <chr>
#>  1 4ceb1a79 843f3357
#>  2 4ceb1a79 5fdeb201
#>  3 4ceb1a79 1c9f2fab
#>  4 4ceb1a79 1870fd10
#>  5 4ceb1a79 8111a974
#>  6 4ceb1a79 8817bcb8
#>  7 4ceb1a79 23f22fa4
#>  8 4ceb1a79 94b4c37c
#>  9 4ceb1a79 2d3cfd7d
#> 10 4ceb1a79 53c92a50
#> # ... with 2,519 more rows
#> 
#> attr(,"class")
#> [1] "PATH" "sc"  
#> attr(,"join_ramp")
#> [1] "object"           "path"             "path_link_vertex"
#> [4] "vertex"

Prove that things work by round-tripping to the PATH model and onto the old fortify approach for ggplot2.

inner_cascade <- function(x) {
  tabnames <- silicate:::join_ramp(x)
  tab <- x[[tabnames[1]]]
  for (ni in tabnames[-1L]) tab <- dplyr::inner_join(tab, x[[ni]])
  tab
}

## this just joins everything back together in one big fortify table
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
tab <- bmodel  %>% inner_cascade()
#> Joining, by = "object"
#> Joining, by = "path"
#> Joining, by = "vertex_"

library(ggplot2)
ggplot(tab) + aes(x = x_, y = y_, group = path) + 
  geom_polygon(aes(fill = AREA)) +  geom_path(lwd = 2, col = "black") 

What about polygons with holes and lots of tiny complicated parts.

data("inlandwaters")

iw <- PATH(inlandwaters)

str(iw)
#> List of 4
#>  $ object          :Classes 'tbl_df', 'tbl' and 'data.frame':    6 obs. of  3 variables:
#>   ..$ ID      : int [1:6] 103841 103842 103843 103846 103847 103848
#>   ..$ Province: chr [1:6] "Australian Capital Territory" "New Caledonia" "New South Wales" "South Australia" ...
#>   ..$ object  : chr [1:6] "6231a2b3" "0748e369" "4cfb8987" "57130e22" ...
#>   ..- attr(*, "sf_column")= chr "geom"
#>   ..- attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA
#>   .. ..- attr(*, "names")= chr [1:2] "ID" "Province"
#>  $ path            :Classes 'tbl_df', 'tbl' and 'data.frame':    189 obs. of  6 variables:
#>   ..$ ncol     : int [1:189] 2 2 2 2 2 2 2 2 2 2 ...
#>   ..$ type     : chr [1:189] "MULTIPOLYGON" "MULTIPOLYGON" "MULTIPOLYGON" "MULTIPOLYGON" ...
#>   ..$ subobject: int [1:189] 1 1 1 1 1 1 1 1 1 1 ...
#>   ..$ object   : chr [1:189] "6231a2b3" "0748e369" "4cfb8987" "4cfb8987" ...
#>   ..$ path     : chr [1:189] "e3d22cca" "062fe863" "9811579f" "4a8545d7" ...
#>   ..$ ncoords_ : int [1:189] 280 27 7310 68 280 88 162 119 51 71 ...
#>  $ vertex          :Classes 'tbl_df', 'tbl' and 'data.frame':    30835 obs. of  3 variables:
#>   ..$ x_     : num [1:30835] 1116371 1117093 1117172 1117741 1117629 ...
#>   ..$ y_     : num [1:30835] -458419 -457111 -456893 -456561 -455510 ...
#>   ..$ vertex_: chr [1:30835] "be17eacc" "f45a7f3b" "61b69e20" "87cc6dc7" ...
#>  $ path_link_vertex:Classes 'tbl_df', 'tbl' and 'data.frame':    33644 obs. of  2 variables:
#>   ..$ path   : chr [1:33644] "e3d22cca" "e3d22cca" "e3d22cca" "e3d22cca" ...
#>   ..$ vertex_: chr [1:33644] "be17eacc" "f45a7f3b" "61b69e20" "87cc6dc7" ...
#>  - attr(*, "class")= chr [1:2] "PATH" "sc"
#>  - attr(*, "join_ramp")= chr [1:4] "object" "path" "path_link_vertex" "vertex"

tab <- iw  %>% inner_cascade()
#> Joining, by = "object"
#> Joining, by = "path"
#> Joining, by = "vertex_"

library(ggplot2)
ggplot(tab) + aes(x = x_, y = y_, group = path) + 
  ggpolypath::geom_polypath(aes(fill = Province)) +  geom_path(col = "black") 

ggplot(tab %>% filter(Province == "South Australia")) + aes(x = x_, y = y_, group = path) + 
  ggpolypath::geom_polypath(fill = "dodgerblue") +  geom_path(col = "black") + coord_fixed()

Releases

No releases published

Packages

No packages published

Languages