diff --git a/.cross_rubies b/.cross_rubies index 234b75d57f..35488a2a5e 100644 --- a/.cross_rubies +++ b/.cross_rubies @@ -1,24 +1,28 @@ -3.0.0:i686-w64-mingw32 -3.0.0:x86_64-w64-mingw32 -3.0.0:i686-linux-gnu -3.0.0:x86_64-linux-gnu -3.0.0:x86_64-darwin -3.0.0:arm64-darwin -2.7.0:i686-w64-mingw32 -2.7.0:x86_64-w64-mingw32 -2.7.0:i686-linux-gnu -2.7.0:x86_64-linux-gnu -2.7.0:x86_64-darwin -2.7.0:arm64-darwin -2.6.0:i686-w64-mingw32 -2.6.0:x86_64-w64-mingw32 -2.6.0:i686-linux-gnu -2.6.0:x86_64-linux-gnu -2.6.0:x86_64-darwin +2.6.0:aarch64-linux 2.6.0:arm64-darwin -2.5.0:i686-w64-mingw32 -2.5.0:x86_64-w64-mingw32 -2.5.0:i686-linux-gnu -2.5.0:x86_64-linux-gnu -2.5.0:x86_64-darwin -2.5.0:arm64-darwin +2.6.0:x64-mingw32 +2.6.0:x86-linux +2.6.0:x86-mingw32 +2.6.0:x86_64-darwin +2.6.0:x86_64-linux +2.7.0:aarch64-linux +2.7.0:arm64-darwin +2.7.0:x64-mingw32 +2.7.0:x86-linux +2.7.0:x86-mingw32 +2.7.0:x86_64-darwin +2.7.0:x86_64-linux +3.0.0:aarch64-linux +3.0.0:arm64-darwin +3.0.0:x64-mingw32 +3.0.0:x86-linux +3.0.0:x86-mingw32 +3.0.0:x86_64-darwin +3.0.0:x86_64-linux +3.1.0:aarch64-linux +3.1.0:arm64-darwin +3.1.0:x64-mingw-ucrt +3.1.0:x86-linux +3.1.0:x86-mingw32 +3.1.0:x86_64-darwin +3.1.0:x86_64-linux diff --git a/.editorconfig b/.editorconfig index e5dd4f83fd..fb6e656d50 100644 --- a/.editorconfig +++ b/.editorconfig @@ -14,4 +14,4 @@ indent_size = 2 [**.java] indent_style = space -indent_size = 4 +indent_size = 2 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000000..50b05b092e --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,320 @@ +name: ci +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 3" # At 08:00 on Wednesday # https://crontab.guru/#0_8_*_*_3 + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + rubocop: + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - run: bundle install --local || bundle install + - run: bundle exec rake rubocop + + gumbo: + needs: ["rubocop"] + strategy: + fail-fast: false + matrix: + plat: ["ubuntu", "windows", "macos"] + runs-on: ${{matrix.plat}}-latest + steps: + - name: configure git crlf + if: matrix.plat == 'windows' + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "3.0" # OK for now, until ruby 3.1 CI is stable + apt-get: "ragel" + brew: "ragel" + mingw: "ragel" + bundler-cache: true + - run: bundle exec rake gumbo:test + + basic: + needs: ["rubocop"] + strategy: + fail-fast: false + matrix: + image: ["ubuntu", "ubuntu32"] + sys: ["enable"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:${{matrix.image}} + steps: + - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334 + with: + submodules: true + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + linux: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + valgrind: + needs: ["linux"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + musl: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:alpine + steps: + - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334 + with: + submodules: true + # skip cache because of https://github.com/actions/cache/issues/675 + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + musl-valgrind: + needs: ["musl"] + strategy: + fail-fast: false + matrix: + sys: ["disable"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:alpine + steps: + - uses: actions/checkout@v1 # v1 because of https://github.com/actions/checkout/issues/334 + with: + submodules: true + # skip cache because of https://github.com/actions/cache/issues/675 + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + libxmlruby: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable"] + ruby: ["3.1"] + env: + BUNDLE_GEMFILE: "Gemfile-libxml-ruby" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + libxmlruby-valgrind: + needs: ["libxmlruby"] + strategy: + fail-fast: false + matrix: + sys: ["disable"] + ruby: ["3.1"] + env: + BUNDLE_GEMFILE: "Gemfile-libxml-ruby" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + osx: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{matrix.ruby}} + bundler-cache: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-macos-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + windows: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.6", "2.7", "3.0", "3.1", "mingw"] + runs-on: windows-2022 + steps: + - name: configure git crlf + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + mingw: "libxml2 libxslt" + bundler-cache: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-windows-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + jruby: + needs: ["basic"] + strategy: + fail-fast: false + matrix: + ruby: ["jruby-9.3"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{matrix.ruby}} + bundler-cache: true + - run: bundle exec rake compile + - run: bundle exec rake test + + bsd: + continue-on-error: true # we're seeing VMs hang and fail the whole workflow + needs: ["basic"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: macos-12 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: vmactions/freebsd-vm@v0.2.0 + with: + usesh: true + prepare: pkg install -y ruby devel/ruby-gems pkgconf libxml2 libxslt + run: | + gem install bundler + bundle install --local || bundle install + bundle exec rake compile -- --${{matrix.sys}}-system-libraries + bundle exec rake test + + memcheck: + strategy: + fail-fast: false + matrix: + sys: ["disable"] + ruby: ["3.1"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-${{matrix.ruby}} + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:memcheck diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml new file mode 100644 index 0000000000..7278f3f546 --- /dev/null +++ b/.github/workflows/downstream.yml @@ -0,0 +1,73 @@ +name: downstream +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + downstream: + name: downstream-${{matrix.name}} + strategy: + fail-fast: false + matrix: + include: + - url: https://github.com/flavorjones/loofah + name: loofah + command: "bundle exec rake test" + - url: https://github.com/rails/rails-html-sanitizer + name: rails-html-sanitizer + command: "bundle exec rake test" + - url: https://github.com/rgrove/sanitize + name: sanitize + command: "bundle exec rake test" + - url: https://github.com/ebeigarts/signer + name: signer + command: "bundle exec rake spec" + - url: https://github.com/WinRb/Viewpoint + name: viewpoint + command: "bundle exec rspec spec" + - url: https://github.com/rails/rails + name: xmlmini + command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/" + - url: https://github.com/pythonicrubyist/creek + name: creek + command: "bundle exec rake spec" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile + - run: | + git clone --depth=1 ${{matrix.url}} ${{matrix.name}} + cd ${{matrix.name}} + if grep nokogiri Gemfile ; then + sed -i 's/\(.*nokogiri.*\)/\1, path: ".."/' Gemfile + else + echo "gem 'nokogiri', path: '..'" >> Gemfile + fi + if egrep "add_development_dependency.*\bbundler\b" *gemspec ; then + sed -i 's/.*add_development_dependency.*\bbundler\b.*//' *gemspec + fi + bundle install --local || bundle install + ${{matrix.command}} diff --git a/.github/workflows/gem-install.yml b/.github/workflows/gem-install.yml new file mode 100644 index 0000000000..16440f71a3 --- /dev/null +++ b/.github/workflows/gem-install.yml @@ -0,0 +1,337 @@ +name: gem-install +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + cruby-package: + name: "cruby-package" + runs-on: ubuntu-latest + container: + image: "larskanis/rake-compiler-dock-mri-x86_64-linux:1.2.2" + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + with: + path: ports/archives + key: tarballs-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: ./scripts/test-gem-build gems ruby + - uses: actions/upload-artifact@v2 + with: + name: cruby-gem + path: gems + retention-days: 1 + + cruby-linux-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + ruby: ["2.6", "2.7", "3.0", "3.1", "head"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + apt-get: "libxml2-dev libxslt1-dev pkg-config" + - uses: actions/download-artifact@v2 + with: + name: cruby-gem + path: gems + - run: ./scripts/test-gem-install gems --${{matrix.sys}}-system-libraries + + cruby-osx-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + - uses: actions/download-artifact@v2 + with: + name: cruby-gem + path: gems + - run: ./scripts/test-gem-install gems --${{matrix.sys}}-system-libraries + + cruby-windows-install: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "3.0" + mingw: "libxml2 libxslt" + - uses: actions/download-artifact@v2 + with: + name: cruby-gem + path: gems + - run: | + gem install --verbose --no-document gems/*.gem -- --${{matrix.sys}}-system-libraries + gem list -d nokogiri + nokogiri -v + + cruby-windows-install-ucrt: + needs: ["cruby-package"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: windows-2022 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "3.1" + mingw: "libxml2 libxslt" + - uses: actions/download-artifact@v2 + with: + name: cruby-gem + path: gems + - run: | + gem install --verbose --no-document gems/*.gem -- --${{matrix.sys}}-system-libraries + gem list -d nokogiri + nokogiri -v + + cruby-native-package: + name: "cruby-native-package" + strategy: + fail-fast: false + matrix: + plat: + - "aarch64-linux" + # - "arm64-darwin" # github actions does not support this as of 2022-01 + - "x64-mingw-ucrt" + - "x64-mingw32" + - "x86-linux" + # - "x86-mingw32" # github actions does not support this as of 2022-01 + - "x86_64-darwin" + - "x86_64-linux" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + with: + path: ports/archives + key: tarballs-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: | + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + larskanis/rake-compiler-dock-mri-${{matrix.plat}}:1.2.2 \ + ./scripts/test-gem-build gems ${{matrix.plat}} + - uses: actions/upload-artifact@v2 + with: + name: "cruby-${{matrix.plat}}-gem" + path: gems + retention-days: 1 + + cruby-x86-linux-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/download-artifact@v2 + with: + name: cruby-x86-linux-gem + path: gems + - run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + --platform=linux/386 \ + ruby:${{matrix.ruby}} \ + ./scripts/test-gem-install gems + + cruby-aarch64-linux-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/download-artifact@v2 + with: + name: cruby-aarch64-linux-gem + path: gems + - run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker run --rm -v "$(pwd):/nokogiri" -w /nokogiri \ + --platform=linux/arm64/v8 \ + ruby:${{matrix.ruby}} \ + ./scripts/test-gem-install gems + + cruby-x86_64-linux-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v2 + with: + name: cruby-x86_64-linux-gem + path: gems + - run: ./scripts/test-gem-install gems + + cruby-x86_64-musl-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:alpine + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/download-artifact@v2 + with: + name: cruby-x86_64-linux-gem + path: gems + - run: ./scripts/test-gem-install gems + + cruby-x86_64-darwin-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.6", "2.7", "3.0", "3.1"] + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v2 + with: + name: cruby-x86_64-darwin-gem + path: gems + - run: ./scripts/test-gem-install gems + + cruby-x64-mingw32-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + matrix: + ruby: ["2.6", "2.7", "3.0"] + runs-on: windows-latest + steps: + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v2 + with: + name: cruby-x64-mingw32-gem + path: gems + - run: | + gem install --verbose --no-document gems/*.gem + gem list -d nokogiri + nokogiri -v + + cruby-x64-mingw-ucrt-install: + needs: ["cruby-native-package"] + strategy: + fail-fast: false + matrix: + ruby: ["3.1"] + runs-on: windows-2022 + steps: + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v2 + with: + name: cruby-x64-mingw-ucrt-gem + path: gems + - run: | + gem install --verbose --no-document gems/*.gem + gem list -d nokogiri + nokogiri -v + + jruby-package: + name: "jruby-package" + runs-on: ubuntu-latest + container: + image: "larskanis/rake-compiler-dock-jruby:1.2.2" + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - run: ./scripts/test-gem-build gems java + - uses: actions/upload-artifact@v2 + with: + name: jruby-gem + path: gems + retention-days: 1 + + jruby-install: + needs: ["jruby-package"] + strategy: + fail-fast: false + matrix: + ruby: ["jruby-9.3", "jruby-head"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "${{matrix.ruby}}" + - uses: actions/download-artifact@v2 + with: + name: jruby-gem + path: gems + - run: ./scripts/test-gem-install gems diff --git a/.github/workflows/generate-ci-images.yml b/.github/workflows/generate-ci-images.yml new file mode 100644 index 0000000000..2c156c6c57 --- /dev/null +++ b/.github/workflows/generate-ci-images.yml @@ -0,0 +1,81 @@ +# DO NOT EDIT +# this file is automatically generated by the "docker:pipeline" rake task +name: Generate CI Images +on: + workflow_dispatch: {} + schedule: + - cron: "0 5 * * 3" # At 05:00 on Wednesday # https://crontab.guru/#0_5_*_*_3 +# reference: https://github.com/marketplace/actions/build-and-push-docker-images +jobs: + build_images: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.1" + bundler-cache: true + - uses: docker/setup-buildx-action@v1 + - uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{github.actor}} + password: ${{secrets.GITHUB_TOKEN}} + - name: alpine + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:alpine + file: oci-images/nokogiri-test/alpine.dockerfile + - name: mri-2.6 + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:mri-2.6 + file: oci-images/nokogiri-test/mri-2.6.dockerfile + - name: mri-2.7 + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:mri-2.7 + file: oci-images/nokogiri-test/mri-2.7.dockerfile + - name: mri-3.0 + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:mri-3.0 + file: oci-images/nokogiri-test/mri-3.0.dockerfile + - name: mri-3.1 + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + file: oci-images/nokogiri-test/mri-3.1.dockerfile + - name: truffle-nightly + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:truffle-nightly + file: oci-images/nokogiri-test/truffle-nightly.dockerfile + - name: ubuntu + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:ubuntu + file: oci-images/nokogiri-test/ubuntu.dockerfile + - name: ubuntu32 + uses: docker/build-push-action@v2 + with: + context: "." + push: true + tags: ghcr.io/sparklemotion/nokogiri-test:ubuntu32 + file: oci-images/nokogiri-test/ubuntu32.dockerfile diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml deleted file mode 100644 index 6d18c8cdd7..0000000000 --- a/.github/workflows/macos.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: macos -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize] - branches: - - '*' - -jobs: - cruby-test-system-libraries: - runs-on: macos-latest - strategy: - matrix: - ruby: ["2.7", "3.0"] - steps: - - uses: actions/checkout@v2 - - name: Set ENV - run: | - echo "MAKEFLAGS=-j$((1 + $(sysctl -n hw.activecpu)))" >> $GITHUB_ENV - echo "NOKOGIRI_USE_SYSTEM_LIBRARIES=t" >> $GITHUB_ENV - - uses: ruby/setup-ruby@v1 - with: - bundler-cache: true - ruby-version: ${{ matrix.ruby }} - - run: bundle exec rake compile - - run: bundle exec rake test - - cruby-test-vendored-libraries: - runs-on: macos-latest - strategy: - matrix: - ruby: ["2.7", "3.0"] - steps: - - uses: actions/checkout@v2 - - uses: actions/cache@v2 - with: - path: ports/archives - key: tarballs-${{ hashFiles('**/dependencies.yml') }} - restore-keys: | - tarballs- - - name: Set ENV - run: | - echo "MAKEFLAGS=-j$((1 + $(sysctl -n hw.activecpu)))" >> $GITHUB_ENV - - uses: ruby/setup-ruby@v1 - with: - bundler-cache: true - ruby-version: ${{ matrix.ruby }} - - run: bundle exec rake compile - - run: bundle exec rake test diff --git a/.github/workflows/truffle.yml b/.github/workflows/truffle.yml new file mode 100644 index 0000000000..cd15d06558 --- /dev/null +++ b/.github/workflows/truffle.yml @@ -0,0 +1,33 @@ +name: truffle +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + +jobs: + truffleruby-head: + strategy: + fail-fast: false + matrix: + flags: + - "--disable-system-libraries --disable-static" + - "--disable-system-libraries --enable-static" + - "--enable-system-libraries" + continue-on-error: true + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:truffle-nightly + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + with: + path: ports/archives + key: tarballs-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile -- ${{matrix.flags}} + - run: bundle exec rake test diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml new file mode 100644 index 0000000000..b7cf6f45ff --- /dev/null +++ b/.github/workflows/upstream.yml @@ -0,0 +1,147 @@ +name: upstream +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + +jobs: + xmlsoft-head: + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - name: Setup libxml2 + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxml2 + cd libxml2 + env NOCONFIGURE=t ./autogen.sh + - name: Setup libxslt + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxslt + cd libxslt + env NOCONFIGURE=t ./autogen.sh + - name: "Run bundle install" + run: "bundle install --local || bundle install" + - name: "Compile against libxml2 and libxslt source directories" + run: "bundle exec rake compile -- --with-xml2-source-dir=${GITHUB_WORKSPACE}/libxml2 --with-xslt-source-dir=${GITHUB_WORKSPACE}/libxslt" + - run: "bundle exec rake test" + + xmlsoft-head-valgrind: + needs: ["xmlsoft-head"] + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - name: Setup libxml2 + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxml2 + cd libxml2 + env NOCONFIGURE=t ./autogen.sh + - name: Setup libxslt + run: | + git clone --depth=1 https://gitlab.gnome.org/GNOME/libxslt + cd libxslt + env NOCONFIGURE=t ./autogen.sh + - name: "Run bundle install" + run: "bundle install --local || bundle install" + - name: "Compile against libxml2 and libxslt source directories" + run: "bundle exec rake compile -- --with-xml2-source-dir=${GITHUB_WORKSPACE}/libxml2 --with-xslt-source-dir=${GITHUB_WORKSPACE}/libxslt" + - run: "bundle exec rake test:valgrind" + + ruby-head: + strategy: + fail-fast: false + matrix: + plat: ["ubuntu", "windows", "macos"] + sys: ["enable", "disable"] + runs-on: ${{matrix.plat}}-latest + steps: + - name: configure git crlf + if: matrix.plat == 'windows' + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "head" + apt-get: "libxml2-dev libxslt1-dev pkg-config" + mingw: "_upgrade_ libxml2 libxslt pkgconf" + bundler-cache: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-${{matrix.plat}}-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test + + ruby-head-valgrind: + needs: ["ruby-head"] + strategy: + fail-fast: false + matrix: + sys: ["enable", "disable"] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: "head" + apt-get: "libxml2-dev libxslt1-dev pkg-config valgrind" + bundler-cache: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle exec rake compile -- --${{matrix.sys}}-system-libraries + - run: bundle exec rake test:valgrind + + jruby-head: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "jruby-head" + bundler-cache: true + - run: bundle exec rake compile + - run: bundle exec rake test + + # html5lib-tests: + # runs-on: ubuntu-latest + # container: + # image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + # steps: + # - uses: actions/checkout@v2 + # with: + # submodules: true + # - uses: actions/cache@v2 + # with: + # path: ports + # key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + # - name: Update html5lib-tests + # run: | + # cd test/html5lib-tests + # git remote update origin + # git checkout origin/master + # git log --pretty=oneline -n1 + # - run: bundle install --local || bundle install + # - run: bundle exec rake compile -- --disable-system-libraries + # - run: bundle exec rake test diff --git a/.gitignore b/.gitignore index 79baf6a716..82f10832d2 100644 --- a/.gitignore +++ b/.gitignore @@ -9,22 +9,18 @@ /gems/ /lib/nokogiri/**/nokogiri.bundle /lib/nokogiri/**/nokogiri.so +/lib/nokogumbo/**/nokogumbo.bundle +/lib/nokogumbo/**/nokogumbo.so /lib/nokogiri/nokogiri.jar /pkg/ /ports/ /tmp/ -# CI -/concourse/docker-compose.yml -/concourse/private.yml -/concourse/images/*.generated - # code coverage /coverage/ # documentation -/.yardoc/ -/doc/ +/html/ # editors and tags .classpath @@ -34,11 +30,15 @@ build TAGS tags +# auto-formatting +/ext/**/*.orig + # mac .DS_Store/ # Vagrant /.vagrant/ -# directories named after issues -/[0-9][0-9][0-9]*/ +# directories or files named after issues, and debugging +/[0-9][0-9][0-9]* +/*.log diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000..57c9b57f2f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "test/html5lib-tests"] + path = test/html5lib-tests + url = https://github.com/html5lib/html5lib-tests.git + branch = master diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000000..b1a51ce3f5 --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,19 @@ +require: + - rubocop-minitest + - rubocop-performance + - rubocop-rake +inherit_gem: + rubocop-shopify: rubocop.yml +inherit_from: .rubocop_todo.yml + +AllCops: + NewCops: enable + Exclude: + - 'lib/nokogiri/css/parser.rb' + - 'lib/nokogiri/css/tokenizer.rb' +Naming/MethodName: + Enabled: false +Naming/FileName: + Exclude: + - rakelib/** + - test/html5/test_tree-construction.rb diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml new file mode 100644 index 0000000000..727df6f532 --- /dev/null +++ b/.rubocop_todo.yml @@ -0,0 +1,109 @@ +# This configuration was generated by +# `rubocop --auto-gen-config` +# on 2022-05-04 15:17:01 UTC using RuboCop version 1.28.2. +# The point is for the user to remove these configuration records +# one by one as the offenses are removed from the code base. +# Note that changes in the inspected code, or installation of new +# versions of RuboCop, may require this file to be generated again. + +# Offense count: 8 +Lint/IneffectiveAccessModifier: + Exclude: + - 'lib/nokogiri/html5.rb' + - 'lib/nokogiri/html5/document.rb' + - 'lib/nokogiri/xml/document.rb' + +# Offense count: 5 +Lint/MissingSuper: + Exclude: + - 'lib/nokogiri/html4/document_fragment.rb' + - 'lib/nokogiri/html5/document_fragment.rb' + - 'lib/nokogiri/xml/document.rb' + - 'lib/nokogiri/xml/document_fragment.rb' + - 'lib/nokogiri/xml/processing_instruction.rb' + +# Offense count: 1 +# This cop supports safe auto-correction (--auto-correct). +Lint/RedundantCopDisableDirective: + Exclude: + - 'lib/nokogiri/xml/processing_instruction.rb' + +# Offense count: 2 +# This cop supports safe auto-correction (--auto-correct). +# Configuration parameters: ContextCreatingMethods, MethodCreatingMethods. +Lint/UselessAccessModifier: + Exclude: + - 'lib/nokogiri/html5.rb' + - 'lib/nokogiri/html5/document.rb' + +# Offense count: 17 +# Configuration parameters: CountBlocks. +Metrics/BlockNesting: + Max: 5 + +# Offense count: 2 +# Configuration parameters: Max, CountKeywordArgs. +Metrics/ParameterLists: + MaxOptionalParameters: 4 + +# Offense count: 4 +# Configuration parameters: MinSize. +Performance/CollectionLiteralInLoop: + Exclude: + - 'lib/nokogiri/xml/pp/node.rb' + - 'test/html5/test_tree-construction.rb' + - 'test/xml/test_dtd_encoding.rb' + - 'test/xml/test_node_reparenting.rb' + +# Offense count: 12 +# This cop supports safe auto-correction (--auto-correct). +Performance/StringIdentifierArgument: + Exclude: + - 'lib/nokogiri/html5.rb' + - 'test/html5/test_encoding.rb' + - 'test/test_css_cache.rb' + - 'test/test_memory_leak.rb' + - 'test/xml/test_builder.rb' + +# Offense count: 2 +# This cop supports safe auto-correction (--auto-correct). +Performance/TimesMap: + Exclude: + - 'test/html5/test_nokogumbo.rb' + +# Offense count: 6 +# This cop supports safe auto-correction (--auto-correct). +# Configuration parameters: EnforcedStyle. +# SupportedStyles: nested, compact +Style/ClassAndModuleChildren: + Exclude: + - 'test/html4/test_html_module.rb' + - 'test/test_nokogiri.rb' + - 'test/test_xslt_transforms.rb' + - 'test/xml/node/test_attribute_methods.rb' + - 'test/xml/sax/test_parser.rb' + - 'test/xml/sax/test_push_parser.rb' + +# Offense count: 17 +# This cop supports safe auto-correction (--auto-correct). +# Configuration parameters: AllowedVars. +Style/FetchEnvVar: + Exclude: + - 'ext/nokogiri/extconf.rb' + - 'rakelib/debug.rake' + - 'rakelib/extensions.rake' + - 'rakelib/rdoc.rake' + - 'test/helper.rb' + +# Offense count: 2 +Style/MissingRespondToMissing: + Exclude: + - 'lib/nokogiri/xml/builder.rb' + +# Offense count: 81 +# This cop supports safe auto-correction (--auto-correct). +# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns. +# URISchemes: http, https +# AllowedPatterns: \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z) +Layout/LineLength: + Max: 250 diff --git a/.yardopts b/.yardopts deleted file mode 100644 index bd2c740114..0000000000 --- a/.yardopts +++ /dev/null @@ -1,7 +0,0 @@ ---embed-mixins ---main=README.md -lib/**/*.rb -ext/nokogiri/*.c -- -README.md -LICENSE.md diff --git a/CHANGELOG.md b/CHANGELOG.md index b6b476529b..481f837e64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,392 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## 1.13.9 / 2022-10-18 + +### Security + +* [CRuby] Vendored libxml2 is updated to address [CVE-2022-2309](https://nvd.nist.gov/vuln/detail/CVE-2022-2309), [CVE-2022-40304](https://nvd.nist.gov/vuln/detail/CVE-2022-40304), and [CVE-2022-40303](https://nvd.nist.gov/vuln/detail/CVE-2022-40303). See [GHSA-2qc6-mcvw-92cw](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-2qc6-mcvw-92cw) for more information. +* [CRuby] Vendored zlib is updated to address [CVE-2022-37434](https://ubuntu.com/security/CVE-2022-37434). Nokogiri was not affected by this vulnerability, but this version of zlib was being flagged up by some vulnerability scanners, see [#2626](https://github.com/sparklemotion/nokogiri/issues/2626) for more information. + + +### Dependencies + +* [CRuby] Vendored libxml2 is updated to [v2.10.3](https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.10.3) from v2.9.14. +* [CRuby] Vendored libxslt is updated to [v1.1.37](https://gitlab.gnome.org/GNOME/libxslt/-/releases/v1.1.37) from v1.1.35. +* [CRuby] Vendored zlib is updated from 1.2.12 to 1.2.13. (See [LICENSE-DEPENDENCIES.md](https://github.com/sparklemotion/nokogiri/blob/v1.13.x/LICENSE-DEPENDENCIES.md#platform-releases) for details on which packages redistribute this library.) + + +### Fixed + +* [CRuby] `Nokogiri::XML::Namespace` objects, when compacted, update their internal struct's reference to the Ruby object wrapper. Previously, with GC compaction enabled, a segmentation fault was possible after compaction was triggered. [[#2658](https://github.com/sparklemotion/nokogiri/issues/2658)] (Thanks, [@eightbitraptor](https://github.com/eightbitraptor) and [@peterzhu2118](https://github.com/peterzhu2118)!) +* [CRuby] `Document#remove_namespaces!` now defers freeing the underlying `xmlNs` struct until the `Document` is GCed. Previously, maintaining a reference to a `Namespace` object that was removed in this way could lead to a segfault. [[#2658](https://github.com/sparklemotion/nokogiri/issues/2658)] + + +## 1.13.8 / 2022-07-23 + +### Deprecated + +- `XML::Reader#attribute_nodes` is deprecated due to incompatibility between libxml2's `xmlReader` memory semantics and Ruby's garbage collector. Although this method continues to exist for backwards compatibility, it is unsafe to call and may segfault. This method will be removed in a future version of Nokogiri, and callers should use `#attribute_hash` instead. [[#2598](https://github.com/sparklemotion/nokogiri/issues/2598)] + + +### Improvements + +- `XML::Reader#attribute_hash` is a new method to safely retrieve the attributes of a node from `XML::Reader`. [[#2598](https://github.com/sparklemotion/nokogiri/issues/2598), [#2599](https://github.com/sparklemotion/nokogiri/issues/2599)] + + +### Fixed + +- [CRuby] Calling `XML::Reader#attributes` is now safe to call. In Nokogiri <= 1.13.7 this method may segfault. [[#2598](https://github.com/sparklemotion/nokogiri/issues/2598), [#2599](https://github.com/sparklemotion/nokogiri/issues/2599)] + + +## 1.13.7 / 2022-07-12 + +### Fixed + +`XML::Node` objects, when compacted, update their internal struct's reference to the Ruby object wrapper. Previously, with GC compaction enabled, a segmentation fault was possible after compaction was triggered. [[#2578](https://github.com/sparklemotion/nokogiri/issues/2578)] (Thanks, [@eightbitraptor](https://github.com/eightbitraptor)!) + + +## 1.13.6 / 2022-05-08 + +### Security + +* [CRuby] Address [CVE-2022-29181](https://nvd.nist.gov/vuln/detail/CVE-2022-29181), improper handling of unexpected data types, related to untrusted inputs to the SAX parsers. See [GHSA-xh29-r2w5-wx8m](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-xh29-r2w5-wx8m) for more information. + + +### Improvements + +* `{HTML4,XML}::SAX::{Parser,ParserContext}` constructor methods now raise `TypeError` instead of segfaulting when an incorrect type is passed. + + +## 1.13.5 / 2022-05-04 + +### Security + +* [CRuby] Vendored libxml2 is updated to address [CVE-2022-29824](https://nvd.nist.gov/vuln/detail/CVE-2022-29824). See [GHSA-cgx6-hpwq-fhv5](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-cgx6-hpwq-fhv5) for more information. + + +### Dependencies + +* [CRuby] Vendored libxml2 is updated from v2.9.13 to [v2.9.14](https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.9.14). + + +### Improvements + +* [CRuby] The libxml2 HTML parser no longer exhibits quadratic behavior when recovering some broken markup related to start-of-tag and bare `<` characters. + + +### Changed + +* [CRuby] The libxml2 HTML parser in v2.9.14 recovers from some broken markup differently. Notably, the XML CDATA escape sequence ` 2.6.1` to `~> 2.7.0`. ("ruby" platform gem only.) + + +### Improved + +* `{XML,HTML4}::DocumentFragment` constructors all now take an optional parse options parameter or block (similar to Document constructors). [[#1692](https://github.com/sparklemotion/nokogiri/issues/1692)] (Thanks, [@JackMc](https://github.com/JackMc)!) +* `Nokogiri::CSS.xpath_for` allows an `XPathVisitor` to be injected, for finer-grained control over how CSS queries are translated into XPath. +* [CRuby] `XML::Reader#encoding` will return the encoding detected by the parser when it's not passed to the constructor. [[#980](https://github.com/sparklemotion/nokogiri/issues/980)] +* [CRuby] Handle abruptly-closed HTML comments as recommended by WHATWG. (Thanks to [tehryanx](https://hackerone.com/tehryanx?type=user) for reporting!) +* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES`, which is turned on by default in `ParseOptions::DEFAULT_{XML,XSLT,HTML,SCHEMA}` (Note that JRuby already supported large line numbers.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)] +* [CRuby] If a cycle is introduced when reparenting a node (i.e., the node becomes its own ancestor), a `RuntimeError` is raised. libxml2 does no checking for this, which means cycles would otherwise result in infinite loops on subsequent operations. (Note that JRuby already did this.) [[#1912](https://github.com/sparklemotion/nokogiri/issues/1912)] +* [CRuby] Source builds will download zlib and libiconv via HTTPS. ("ruby" platform gem only.) [[#2391](https://github.com/sparklemotion/nokogiri/issues/2391)] (Thanks, [@jmartin-r7](https://github.com/jmartin-r7)!) +* [JRuby] `Node#line` behavior has been modified to return the line number of the node in the _final DOM structure_. This behavior is different from CRuby, which returns the node's position in the _input string_. Ideally the two implementations would be the same, but at least is now officially documented and tested. The real-world impact of this change is that the value returned in JRuby is greater by 1 to account for the XML prolog in the output. [[#2380](https://github.com/sparklemotion/nokogiri/issues/2380)] (Thanks, [@dabdine](https://github.com/dabdine)!) + + +### Fixed + +* CSS queries on HTML5 documents now correctly match foreign elements (SVG, MathML) when namespaces are not specified in the query. [[#2376](https://github.com/sparklemotion/nokogiri/issues/2376)] +* `XML::Builder` blocks restore context properly when exceptions are raised. [[#2372](https://github.com/sparklemotion/nokogiri/issues/2372)] (Thanks, [@ric2b](https://github.com/ric2b) and [@rinthedev](https://github.com/rinthedev)!) +* The `Nokogiri::CSS::Parser` cache now uses the `XPathVisitor` configuration as part of the cache key, preventing incorrect cache results from being returned when multiple `XPathVisitor` options are being used. +* Error recovery from in-context parsing (e.g., `Node#parse`) now always uses the correct `DocumentFragment` class. Previously `Nokogiri::HTML4::DocumentFragment` was always used, even for XML documents. [[#1158](https://github.com/sparklemotion/nokogiri/issues/1158)] +* `DocumentFragment#>` now works properly, matching a CSS selector against only the fragment roots. [[#1857](https://github.com/sparklemotion/nokogiri/issues/1857)] +* `XML::DocumentFragment#errors` now correctly contains any parsing errors encountered. Previously this was always empty. (Note that `HTML::DocumentFragment#errors` already did this.) +* [CRuby] Fix memory leak in `Document#canonicalize` when inclusive namespaces are passed in. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in `Document#canonicalize` when an argument type error is raised. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in `EncodingHandler` where iconv handlers were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in XPath custom handlers where string arguments were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)] +* [CRuby] Fix memory leak in `Reader#base_uri` where the string returned by libxml2 was not freed. [[#2347](https://github.com/sparklemotion/nokogiri/issues/2347)] +* [JRuby] Deleting a `Namespace` from a `NodeSet` no longer modifies the `href` to be the default namespace URL. +* [JRuby] Fix XHTML formatting of closing tags for non-container elements. [[#2355](https://github.com/sparklemotion/nokogiri/issues/2355)] + + +### Deprecated + +* Passing a `Nokogiri::XML::Node` as the second parameter to `Node.new` is deprecated and will generate a warning. This parameter should be a kind of `Nokogiri::XML::Document`. This will become an error in a future version of Nokogiri. [[#975](https://github.com/sparklemotion/nokogiri/issues/975)] +* `Nokogiri::CSS::Parser`, `Nokogiri::CSS::Tokenizer`, and `Nokogiri::CSS::Node` are now internal-only APIs that are no longer documented, and should not be considered stable. With the introduction of `XPathVisitor` injection into `Nokogiri::CSS.xpath_for` there should be no reason to rely on these internal APIs. +* CSS-to-XPath utility classes `Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins` and `XPathVisitorOptimallyUseBuiltins` are deprecated. Prefer `Nokogiri::CSS::XPathVisitor` with appropriate constructor arguments. These classes will be removed in a future version of Nokogiri. + + +## 1.12.5 / 2021-09-27 + +### Security + +[JRuby] Address CVE-2021-41098 ([GHSA-2rr5-8q37-2w7h](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-2rr5-8q37-2w7h)). + +In Nokogiri v1.12.4 and earlier, on JRuby only, the SAX parsers resolve external entities (XXE) by default. This fix turns off entity-resolution-by-default in the JRuby SAX parsers to match the CRuby SAX parsers' behavior. + +CRuby users are not affected by this CVE. + + +### Fixed + +* [CRuby] `Document#to_xhtml` properly serializes self-closing tags in libxml > 2.9.10. A behavior change introduced in libxml 2.9.11 resulted in emitting start and and tags (e.g., `

`) instead of a self-closing tag (e.g., `
`) in previous Nokogiri versions. [[#2324](https://github.com/sparklemotion/nokogiri/issues/2324)] + + +## 1.12.4 / 2021-08-29 + +### Notable fix: Namespace inheritance + +Namespace behavior when reparenting nodes has historically been poorly specified and the behavior diverged between CRuby and JRuby. As a result, making this behavior consistent in v1.12.0 introduced a breaking change. + +This patch release reverts the Builder behavior present in v1.12.0..v1.12.3 but keeps the Document behavior. This release also introduces a Document attribute to allow affected users to easily change this behavior for their legacy code without invasive changes. + + +#### Compensating Feature in XML::Document + +This release of Nokogiri introduces a new `Document` boolean attribute, `namespace_inheritance`, which controls whether children should inherit a namespace when they are reparented. `Nokogiri::XML:Document` defaults this attribute to `false` meaning "do not inherit," thereby making explicit the behavior change introduced in v1.12.0. + +CRuby users who desire the pre-v1.12.0 behavior may set `document.namespace_inheritance = true` before reparenting nodes. + +See https://nokogiri.org/rdoc/Nokogiri/XML/Document.html#namespace_inheritance-instance_method for example usage. + + +#### Fix for XML::Builder + +However, recognizing that we want `Builder`-created children to inherit namespaces, Builder now will set `namespace_inheritance=true` on the underlying document for both JRuby and CRuby. This means that, on CRuby, the pre-v1.12.0 behavior is restored. + +Users who want to turn this behavior off may pass a keyword argument to the Builder constructor like so: + +``` ruby +Nokogiri::XML::Builder.new(namespace_inheritance: false) +``` + +See https://nokogiri.org/rdoc/Nokogiri/XML/Builder.html#label-Namespace+inheritance for example usage. + + +#### Downstream gem maintainers + +Note that any downstream gems may want to specifically omit Nokogiri v1.12.0--v1.12.3 from their dependency specification if they rely on child namespace inheritance: + +``` ruby +Gem::Specification.new do |gem| + # ... + gem.add_runtime_dependency 'nokogiri', '!=1.12.3', '!=1.12.2', '!=1.12.1', '!=1.12.0' + # ... +end +``` + + +### Fixed + +* [JRuby] Fix NPE in Schema parsing when an imported resource doesn't have a `systemId`. [[#2296](https://github.com/sparklemotion/nokogiri/issues/2296)] (Thanks, [@pepijnve](https://github.com/pepijnve)!) + + +## 1.12.3 / 2021-08-10 + +### Fixed + +* [CRuby] Fix compilation of libgumbo on older systems with versions of GCC that give errors on C99-isms. Affected systems include RHEL6, RHEL7, and SLES12. [[#2302](https://github.com/sparklemotion/nokogiri/issues/2302)] + + +## 1.12.2 / 2021-08-04 + +### Fixed + +* [CRuby] Ensure that C extension files in non-native gem installations are loaded using `require` and rely on `$LOAD_PATH` instead of using `require_relative`. This issue only exists when deleting shared libraries that exist outside the extensions directory, something users occasionally do to conserve disk space. [[#2300](https://github.com/sparklemotion/nokogiri/issues/2300)] + + +## 1.12.1 / 2021-08-03 + +### Fixed + +* [CRuby] Fix compilation of libgumbo on BSD systems by avoiding GNU-isms. [[#2298](https://github.com/sparklemotion/nokogiri/issues/2298)] + + +## 1.12.0 / 2021-08-02 + +### Notable Addition: HTML5 Support (CRuby only) + +__HTML5 support__ has been added (to CRuby only) by merging [Nokogumbo](https://github.com/rubys/nokogumbo) into Nokogiri. The Nokogumbo public API has been preserved, so this functionality is available under the `Nokogiri::HTML5` namespace. [[#2204](https://github.com/sparklemotion/nokogiri/issues/2204)] + +Please note that HTML5 support is not available for JRuby in this version. However, we feel it is important to think about JRuby and we hope to work on this in the future. If you're interested in helping with HTML5 support on JRuby, please reach out to the maintainers by commenting on issue [#2227](https://github.com/sparklemotion/nokogiri/issues/2227). + +Many thanks to Sam Ruby, Steve Checkoway, and Craig Barnes for creating and maintaining Nokogumbo and supporting the Gumbo HTML5 parser. They're now Nokogiri core contributors with all the powers and privileges pertaining thereto. 🙌 + + +### Notable Change: `Nokogiri::HTML4` module and namespace + +`Nokogiri::HTML` has been renamed to `Nokogiri::HTML4`, and `Nokogiri::HTML` is aliased to preserve backwards-compatibility. `Nokogiri::HTML` and `Nokogiri::HTML4` parse methods still use libxml2's (or NekoHTML's) HTML4 parser in the v1.12 release series. + +Take special note that if you rely on the class name of an object in your code, objects will now report a class of `Nokogiri::HTML4::Foo` where they previously reported `Nokogiri::HTML::Foo`. Instead of relying on the string returned by `Object#class`, prefer `Class#===` or `Object#is_a?` or `Object#instance_of?`. + +Future releases of Nokogiri may deprecate `HTML` methods or otherwise change this behavior, so please start using `HTML4` in place of `HTML`. + + +### Added + +* [CRuby] `Nokogiri::VERSION_INFO["libxslt"]["datetime_enabled"]` is a new boolean value which describes whether libxslt (or, more properly, libexslt) has compiled-in datetime support. This generally going to be `true`, but some distros ship without this support (e.g., some mingw UCRT-based packages, see https://github.com/msys2/MINGW-packages/pull/8957). See [#2272](https://github.com/sparklemotion/nokogiri/issues/2272) for more details. + + +### Changed + +* Introduce a new constant, `Nokogiri::XML::ParseOptions::DEFAULT_XSLT`, which adds the libxslt-preferred options of `NOENT | DTDLOAD | DTDATTR | NOCDATA` to `ParseOptions::DEFAULT_XML`. +* `Nokogiri.XSLT` parses stylesheets using `ParseOptions::DEFAULT_XSLT`, which should make some edge-case XSL transformations match libxslt's default behavior. [[#1940](https://github.com/sparklemotion/nokogiri/issues/1940)] + + +### Fixed + +* [CRuby] Namespaced attributes are handled properly when their parent node is reparented into another document. Previously, the namespace may have gotten dropped. [[#2228](https://github.com/sparklemotion/nokogiri/issues/2228)] +* [CRuby] Reparented nodes no longer inherit their parent's namespace. Previously, a node without a namespace was forced to adopt its parent's namespace. [[#1712](https://github.com/sparklemotion/nokogiri/issues/1712), [#425](https://github.com/sparklemotion/nokogiri/issues/425)] + + +### Improved + +* [CRuby] Speed up (slightly) the compile time of packaged libraries `libiconv`, `libxml2`, and `libxslt` by using autoconf's `--disable-dependency-tracking` option. ("ruby" platform gem only.) + + +### Deprecated + +* Deprecating Nokogumbo's `Nokogiri::HTML5.get`. This method will be removed in a future version of Nokogiri. + + +### Dependencies + +* [CRuby] Upgrade mini_portile2 dependency from `~> 2.5.0` to `~> 2.6.1`. ("ruby" platform gem only.) + + +## 1.11.7 / 2021-06-02 + +### Fixed + +* [CRuby] Backporting an upstream fix to XPath recursion depth limits which impacted some users of complex XPath queries. This issue is present in libxml 2.9.11 and 2.9.12. [[#2257](https://github.com/sparklemotion/nokogiri/issues/2257)] + + +## 1.11.6 / 2021-05-26 + +### Fixed + +* [CRuby] `DocumentFragment#path` now does proper error-checking to handle behavior introduced in libxml > 2.9.10. In v1.11.4 and v1.11.5, calling `DocumentFragment#path` could result in a segfault. + + +## 1.11.5 / 2021-05-19 + +### Fixed + +[Windows CRuby] Work around segfault at process exit on Windows when using libxml2 system DLLs. + +libxml 2.9.12 introduced new behavior to avoid memory leaks when unloading libxml2 shared libraries (see [libxml/!66](https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/66)). Early testing caught this segfault on non-Windows platforms (see [#2059](https://github.com/sparklemotion/nokogiri/issues/2059) and [libxml@956534e](https://gitlab.gnome.org/GNOME/libxml2/-/commit/956534e02ef280795a187c16f6ac04e107f23c5d)) but it was incompletely fixed and is still an issue on Windows platforms that are using system DLLs. + +We work around this by configuring libxml2 in this situation to use its default memory management functions. Note that if Nokogiri is not on Windows, or is not using shared system libraries, it will will continue to configure libxml2 to use Ruby's memory management functions. `Nokogiri::VERSION_INFO["libxml"]["memory_management"]` will allow you to verify when the default memory management functions are being used. [[#2241](https://github.com/sparklemotion/nokogiri/issues/2241)] + + +### Added + +`Nokogiri::VERSION_INFO["libxml"]` now contains the key `"memory_management"` to declare whether libxml2 is using its `default` memory management functions, or whether it uses the memory management functions from `ruby`. See above for more details. + + +## 1.11.4 / 2021-05-14 + +### Security + +[CRuby] Vendored libxml2 upgraded to v2.9.12 which addresses: + +- [CVE-2019-20388](https://security.archlinux.org/CVE-2019-20388) +- [CVE-2020-24977](https://security.archlinux.org/CVE-2020-24977) +- [CVE-2021-3517](https://security.archlinux.org/CVE-2021-3517) +- [CVE-2021-3518](https://security.archlinux.org/CVE-2021-3518) +- [CVE-2021-3537](https://security.archlinux.org/CVE-2021-3537) +- [CVE-2021-3541](https://security.archlinux.org/CVE-2021-3541) + +Note that two additional CVEs were addressed upstream but are not relevant to this release. [CVE-2021-3516](https://security.archlinux.org/CVE-2021-3516) via `xmllint` is not present in Nokogiri, and [CVE-2020-7595](https://security.archlinux.org/CVE-2020-7595) has been patched in Nokogiri since v1.10.8 (see [#1992](https://github.com/sparklemotion/nokogiri/issues/1992)). + +Please see [nokogiri/GHSA-7rrm-v45f-jp64 ](https://github.com/sparklemotion/nokogiri/security/advisories/GHSA-7rrm-v45f-jp64) or [#2233](https://github.com/sparklemotion/nokogiri/issues/2233) for a more complete analysis of these CVEs and patches. + + +### Dependencies + +* [CRuby] vendored libxml2 is updated from 2.9.10 to 2.9.12. (Note that 2.9.11 was skipped because it was superseded by 2.9.12 a few hours after its release.) + + ## 1.11.3 / 2021-04-07 ### Fixed @@ -26,7 +412,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Improved -* Reduce the number of object allocations needed when parsing an HTML::DocumentFragment. [[#2087](https://github.com/sparklemotion/nokogiri/issues/2087)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) +* Reduce the number of object allocations needed when parsing an `HTML::DocumentFragment`. [[#2087](https://github.com/sparklemotion/nokogiri/issues/2087)] (Thanks, [@ashmaroli](https://github.com/ashmaroli)!) * [JRuby] Update the algorithm used to calculate `Node#line` to be wrong less-often. The underlying parser, Xerces, does not track line numbers, and so we've always used a hacky solution for this method. [[#1223](https://github.com/sparklemotion/nokogiri/issues/1223), [#2177](https://github.com/sparklemotion/nokogiri/issues/2177)] * Introduce `--enable-system-libraries` and `--disable-system-libraries` flags to `extconf.rb`. These flags provide the same functionality as `--use-system-libraries` and the `NOKOGIRI_USE_SYSTEM_LIBRARIES` environment variable, but are more idiomatic. [[#2193](https://github.com/sparklemotion/nokogiri/issues/2193)] (Thanks, [@eregon](https://github.com/eregon)!) * [TruffleRuby] `--disable-static` is now the default on TruffleRuby when the packaged libraries are used. This is more flexible and compiles faster. (Note, though, that the default on TR is still to use system libraries.) [[#2191](https://github.com/sparklemotion/nokogiri/issues/2191#issuecomment-780724627), [#2193](https://github.com/sparklemotion/nokogiri/issues/2193)] (Thanks, [@eregon](https://github.com/eregon)!) @@ -108,7 +494,7 @@ See note below about CVE-2020-26247 in the "Changed" subsection entitled "XML::S ### Improved * [CRuby] Handle incorrectly-closed HTML comments as WHATWG recommends for browsers. [[#2058](https://github.com/sparklemotion/nokogiri/issues/2058)] (Thanks to HackerOne user [mayflower](https://hackerone.com/mayflower?type=user) for reporting this!) -* {HTML,XML}::Document#parse now accept `Pathname` objects. Previously this worked only if the referenced file was less than 4096 bytes long; longer files resulted in undefined behavior because the `read` method would be repeatedly invoked. [[#1821](https://github.com/sparklemotion/nokogiri/issues/1821), [#2110](https://github.com/sparklemotion/nokogiri/issues/2110)] (Thanks, [@doriantaylor](https://github.com/doriantaylor) and [@phokz](https://github.com/phokz)!) +* `{HTML,XML}::Document#parse` now accept `Pathname` objects. Previously this worked only if the referenced file was less than 4096 bytes long; longer files resulted in undefined behavior because the `read` method would be repeatedly invoked. [[#1821](https://github.com/sparklemotion/nokogiri/issues/1821), [#2110](https://github.com/sparklemotion/nokogiri/issues/2110)] (Thanks, [@doriantaylor](https://github.com/doriantaylor) and [@phokz](https://github.com/phokz)!) * [CRuby] Nokogumbo builds faster because it can now use header files provided by Nokogiri. [[#1788](https://github.com/sparklemotion/nokogiri/issues/1788)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) * Add `frozen_string_literal: true` magic comment to all `lib` files. [[#1745](https://github.com/sparklemotion/nokogiri/issues/1745)] (Thanks, [@oniofchaos](https://github.com/oniofchaos)!) * [JRuby] Clean up deprecated calls into JRuby. [[#2027](https://github.com/sparklemotion/nokogiri/issues/2027)] (Thanks, [@headius](https://github.com/headius)!) @@ -120,7 +506,7 @@ See note below about CVE-2020-26247 in the "Changed" subsection entitled "XML::S * The CSS `~=` operator now correctly handles non-space whitespace in the `class` attribute. commit e45dedd * The switch to turn off the CSS-to-XPath cache is now thread-local, rather than being shared mutable state. [[#1935](https://github.com/sparklemotion/nokogiri/issues/1935)] * The Node methods `add_previous_sibling`, `previous=`, `before`, `add_next_sibling`, `next=`, `after`, `replace`, and `swap` now correctly use their parent as the context node for parsing markup. These methods now also raise a `RuntimeError` if they are called on a node with no parent. [[nokogumbo#160](https://github.com/rubys/nokogumbo/issues/160)] -* [JRuby] XML::Schema XSD validation errors are captured in `XML::Schema#errors`. These errors were previously ignored. +* [JRuby] `XML::Schema` XSD validation errors are captured in `XML::Schema#errors`. These errors were previously ignored. * [JRuby] Standardize reading from IO like objects, including StringIO. [[#1888](https://github.com/sparklemotion/nokogiri/issues/1888), [#1897](https://github.com/sparklemotion/nokogiri/issues/1897)] * [JRuby] Fix how custom XPath function namespaces are inferred to be less naive. [[#1890](https://github.com/sparklemotion/nokogiri/issues/1890), [#2148](https://github.com/sparklemotion/nokogiri/issues/2148)] * [JRuby] Clarify exception message when custom XPath functions can't be resolved. @@ -312,17 +698,17 @@ This CVE's public notice is [#1915](https://github.com/sparklemotion/nokogiri/is * `XML::Attr#value=` allows HTML node attribute values to be set to either a blank string or an empty boolean attribute. [[#1800](https://github.com/sparklemotion/nokogiri/issues/1800)] * Introduce `XML::Node#wrap` which does what `XML::NodeSet#wrap` has always done, but for a single node. [[#1531](https://github.com/sparklemotion/nokogiri/issues/1531)] (Thanks, [@ethirajsrinivasan](https://github.com/ethirajsrinivasan)!) * [MRI] Improve installation experience on macOS High Sierra (Darwin). [[#1812](https://github.com/sparklemotion/nokogiri/issues/1812), [#1813](https://github.com/sparklemotion/nokogiri/issues/1813)] (Thanks, [@gpakosz](https://github.com/gpakosz) and [@nurse](https://github.com/nurse)!) -* [MRI] Node#dup supports copying a node directly to a new document. See the method documentation for details. -* [MRI] DocumentFragment#dup is now more memory-efficient, avoiding making unnecessary copies. [[#1063](https://github.com/sparklemotion/nokogiri/issues/1063)] -* [JRuby] NodeSet has been rewritten to improve performance! [[#1795](https://github.com/sparklemotion/nokogiri/issues/1795)] +* [MRI] `Node#dup` supports copying a node directly to a new document. See the method documentation for details. +* [MRI] `DocumentFragment#dup` is now more memory-efficient, avoiding making unnecessary copies. [[#1063](https://github.com/sparklemotion/nokogiri/issues/1063)] +* [JRuby] `NodeSet` has been rewritten to improve performance! [[#1795](https://github.com/sparklemotion/nokogiri/issues/1795)] ### Fixed * `NodeSet#each` now returns `self` instead of zero. [[#1822](https://github.com/sparklemotion/nokogiri/issues/1822)] (Thanks, [@olehif](https://github.com/olehif)!) -* [MRI] Address a memory leak when using XML::Builder to create nodes with namespaces. [[#1810](https://github.com/sparklemotion/nokogiri/issues/1810)] +* [MRI] Address a memory leak when using `XML::Builder` to create nodes with namespaces. [[#1810](https://github.com/sparklemotion/nokogiri/issues/1810)] * [MRI] Address a memory leak when unparenting a DTD. [[#1784](https://github.com/sparklemotion/nokogiri/issues/1784)] (Thanks, [@stevecheckoway](https://github.com/stevecheckoway)!) -* [MRI] Use RbConfig::CONFIG instead of ::MAKEFILE_CONFIG to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!) +* [MRI] Use `RbConfig::CONFIG` instead of `::MAKEFILE_CONFIG` to fix installations that use Makefile macros. [[#1820](https://github.com/sparklemotion/nokogiri/issues/1820)] (Thanks, [@nobu](https://github.com/nobu)!) * [JRuby] Decrease large memory usage when making nested XPath queries. [[#1749](https://github.com/sparklemotion/nokogiri/issues/1749)] * [JRuby] Fix failing tests on JRuby 9.2.x * [JRuby] Fix default namespaces in nodes reparented into a different document [[#1774](https://github.com/sparklemotion/nokogiri/issues/1774)] @@ -385,10 +771,10 @@ If you're offended by what happened here, I'd kindly ask that you comment on the ### Added -* Node#classes, #add_class, #append_class, and #remove_class are added. -* NodeSet#append_class is added. -* NodeSet#remove_attribute is a new alias for NodeSet#remove_attr. -* NodeSet#each now returns an Enumerator when no block is passed (Thanks, [@park53kr](https://github.com/park53kr)!) +* `Node#classes`, `#add_class`, `#append_class`, and `#remove_class` are added. +* `NodeSet#append_class` is added. +* `NodeSet#remove_attribute` is a new alias for `NodeSet#remove_attr`. +* `NodeSet#each` now returns an `Enumerator` when no block is passed (Thanks, [@park53kr](https://github.com/park53kr)!) * [JRuby] General improvements in JRuby implementation (Thanks, [@kares](https://github.com/kares)!) @@ -397,7 +783,7 @@ If you're offended by what happened here, I'd kindly ask that you comment on the * CSS attribute selectors now gracefully handle queries using integers. [[#711](https://github.com/sparklemotion/nokogiri/issues/711)] * Handle ASCII-8BIT encoding on fragment input [[#553](https://github.com/sparklemotion/nokogiri/issues/553)] * Handle non-string return values within `Reader` [[#898](https://github.com/sparklemotion/nokogiri/issues/898)] -* [JRuby] Allow Node#replace to insert Comment and CDATA nodes. [[#1666](https://github.com/sparklemotion/nokogiri/issues/1666)] +* [JRuby] Allow `Node#replace` to insert Comment and CDATA nodes. [[#1666](https://github.com/sparklemotion/nokogiri/issues/1666)] * [JRuby] Stability and speed improvements to `Node`, `Sax::PushParser`, and the JRuby implementation [[#1708](https://github.com/sparklemotion/nokogiri/issues/1708), [#1710](https://github.com/sparklemotion/nokogiri/issues/1710), [#1501](https://github.com/sparklemotion/nokogiri/issues/1501)] @@ -422,7 +808,7 @@ If you're offended by what happened here, I'd kindly ask that you comment on the ### Fixed -* Node#serialize once again returns UTF-8-encoded strings. [[#1659](https://github.com/sparklemotion/nokogiri/issues/1659)] +* `Node#serialize` once again returns UTF-8-encoded strings. [[#1659](https://github.com/sparklemotion/nokogiri/issues/1659)] * [JRuby] made SAX parsing of characters consistent with C implementation [[#1676](https://github.com/sparklemotion/nokogiri/issues/1676)] (Thanks, [[@andrew](https://github.com/andrew)-aladev](https://github.com/andrew-aladev)!) * [MRI] Predefined entities, when inspected, no longer cause a segfault. [[#1238](https://github.com/sparklemotion/nokogiri/issues/1238)] @@ -439,7 +825,7 @@ If you're offended by what happened here, I'd kindly ask that you comment on the ### Fixed -* NodeSet#first with an integer argument longer than the length of the NodeSet now correctly clamps the length of the returned NodeSet to the original length. [[#1650](https://github.com/sparklemotion/nokogiri/issues/1650)] (Thanks, [@Derenge](https://github.com/Derenge)!) +* `NodeSet#first` with an integer argument longer than the length of the `NodeSet` now correctly clamps the length of the returned `NodeSet` to the original length. [[#1650](https://github.com/sparklemotion/nokogiri/issues/1650)] (Thanks, [@Derenge](https://github.com/Derenge)!) * [MRI] Ensure CData.new raises TypeError if the `content` argument is not implicitly convertible into a string. [[#1669](https://github.com/sparklemotion/nokogiri/issues/1669)] @@ -463,10 +849,10 @@ Please note that this deprecation note only applies to the precompiled Windows g ### Added -* NodeSet#clone is now an alias for NodeSet#dup [[#1503](https://github.com/sparklemotion/nokogiri/issues/1503)] (Thanks, [@stephankaag](https://github.com/stephankaag)!) +* `NodeSet#clone` is now an alias for `NodeSet#dup` [[#1503](https://github.com/sparklemotion/nokogiri/issues/1503)] (Thanks, [@stephankaag](https://github.com/stephankaag)!) * Allow Processing Instructions and Comments as children of a document root. [[#1033](https://github.com/sparklemotion/nokogiri/issues/1033)] (Thanks, [@windwiny](https://github.com/windwiny)!) -* [MRI] PushParser#replace_entities and #replace_entities= will control whether entities are replaced or not. [[#1017](https://github.com/sparklemotion/nokogiri/issues/1017)] (Thanks, [@spraints](https://github.com/spraints)!) -* [MRI] SyntaxError#to_s now includes line number, column number, and log level if made available by the parser. [[#1304](https://github.com/sparklemotion/nokogiri/issues/1304), [#1637](https://github.com/sparklemotion/nokogiri/issues/1637)] (Thanks, [@spk](https://github.com/spk) and [@ccarruitero](https://github.com/ccarruitero)!) +* [MRI] `PushParser#replace_entities` and `#replace_entities=` will control whether entities are replaced or not. [[#1017](https://github.com/sparklemotion/nokogiri/issues/1017)] (Thanks, [@spraints](https://github.com/spraints)!) +* [MRI] `SyntaxError#to_s` now includes line number, column number, and log level if made available by the parser. [[#1304](https://github.com/sparklemotion/nokogiri/issues/1304), [#1637](https://github.com/sparklemotion/nokogiri/issues/1637)] (Thanks, [@spk](https://github.com/spk) and [@ccarruitero](https://github.com/ccarruitero)!) * [MRI] Cross-built Windows gems now support Ruby 2.4 * [MRI] Support for frozen string literals. [[#1413](https://github.com/sparklemotion/nokogiri/issues/1413)] * [MRI] Support for installing Nokogiri on a machine in FIPS-enabled mode [[#1544](https://github.com/sparklemotion/nokogiri/issues/1544)] @@ -478,16 +864,16 @@ Please note that this deprecation note only applies to the precompiled Windows g ### Fixed -* HTML::SAX::Parser#parse_io now correctly parses HTML and not XML [[#1577](https://github.com/sparklemotion/nokogiri/issues/1577)] (Thanks for the test case, [@gregors](https://github.com/gregors)!) +* `HTML::SAX::Parser#parse_io` now correctly parses HTML and not XML [[#1577](https://github.com/sparklemotion/nokogiri/issues/1577)] (Thanks for the test case, [@gregors](https://github.com/gregors)!) * Support installation on systems with a `lib64` site config. [[#1562](https://github.com/sparklemotion/nokogiri/issues/1562)] * [MRI] on OpenBSD, do not require gcc if using system libraries [[#1515](https://github.com/sparklemotion/nokogiri/issues/1515)] (Thanks, [@jeremyevans](https://github.com/jeremyevans)!) -* [MRI] XML::Attr.new checks type of Document arg to prevent segfaults. [[#1477](https://github.com/sparklemotion/nokogiri/issues/1477)] +* [MRI] `XML::Attr.new` checks type of Document arg to prevent segfaults. [[#1477](https://github.com/sparklemotion/nokogiri/issues/1477)] * [MRI] Prefer xmlCharStrdup (and friends) to strdup (and friends), which can cause problems on some platforms. [[#1517](https://github.com/sparklemotion/nokogiri/issues/1517)] (Thanks, [@jeremy](https://github.com/jeremy)!) * [JRuby] correctly append a text node before another text node [[#1318](https://github.com/sparklemotion/nokogiri/issues/1318)] (Thanks, [@jkraemer](https://github.com/jkraemer)!) * [JRuby] custom xpath functions returning an integer now work correctly [[#1595](https://github.com/sparklemotion/nokogiri/issues/1595)] (Thanks, [@kares](https://github.com/kares)!) * [JRuby] serializing (`#to_html`, `#to_s`, et al) a document with explicit encoding now works correctly. [[#1281](https://github.com/sparklemotion/nokogiri/issues/1281), [#1440](https://github.com/sparklemotion/nokogiri/issues/1440)] (Thanks, [@kares](https://github.com/kares)!) -* [JRuby] XML::Reader now returns parse errors [[#1586](https://github.com/sparklemotion/nokogiri/issues/1586)] (Thanks, [@kares](https://github.com/kares)!) -* [JRuby] Empty NodeSets are now decorated properly. [[#1319](https://github.com/sparklemotion/nokogiri/issues/1319)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] `XML::Reader` now returns parse errors [[#1586](https://github.com/sparklemotion/nokogiri/issues/1586)] (Thanks, [@kares](https://github.com/kares)!) +* [JRuby] Empty `NodeSet`s are now decorated properly. [[#1319](https://github.com/sparklemotion/nokogiri/issues/1319)] (Thanks, [@kares](https://github.com/kares)!) * [JRuby] Merged nodes no longer results in Java exceptions during XPath queries. [[#1320](https://github.com/sparklemotion/nokogiri/issues/1320)] (Thanks, [@kares](https://github.com/kares)!) @@ -582,10 +968,10 @@ See this libxslt email post for more: Several changes were made to improve performance: -* [MRI] Simplify NodeSet#to_a with a minor speed-up. ([#1397](https://github.com/sparklemotion/nokogiri/issues/1397)) -* XML::Node#ancestors optimization. ([#1297](https://github.com/sparklemotion/nokogiri/issues/1297)) (Thanks, Bruno Sutic!) -* Use Symbol#to_proc where we weren't previously. ([#1296](https://github.com/sparklemotion/nokogiri/issues/1296)) (Thanks, Bruno Sutic!) -* XML::DTD#each uses implicit block calls. (Thanks, [@glaucocustodio](https://github.com/glaucocustodio)!) +* [MRI] Simplify `NodeSet#to_a` with a minor speed-up. ([#1397](https://github.com/sparklemotion/nokogiri/issues/1397)) +* `XML::Node#ancestors` optimization. ([#1297](https://github.com/sparklemotion/nokogiri/issues/1297)) (Thanks, Bruno Sutic!) +* Use `Symbol#to_proc` where we weren't previously. ([#1296](https://github.com/sparklemotion/nokogiri/issues/1296)) (Thanks, Bruno Sutic!) +* `XML::DTD#each` uses implicit block calls. (Thanks, [@glaucocustodio](https://github.com/glaucocustodio)!) * Fall back to the `pkg-config` gem if we're having trouble finding the system libxml2. This should help many FreeBSD users. ([#1417](https://github.com/sparklemotion/nokogiri/issues/1417)) * Set document encoding appropriately even on blank document. ([#1043](https://github.com/sparklemotion/nokogiri/issues/1043)) (Thanks, [@batter](https://github.com/batter)!) @@ -594,7 +980,7 @@ Several changes were made to improve performance: * [JRuby] fix slow add_child ([#692](https://github.com/sparklemotion/nokogiri/issues/692)) * [JRuby] fix load errors when deploying to JRuby/Torquebox ([#1114](https://github.com/sparklemotion/nokogiri/issues/1114)) (Thanks, [@atambo](https://github.com/atambo) and [@jvshahid](https://github.com/jvshahid)!) -* [JRuby] fix NPE when inspecting nodes returned by NodeSet#drop ([#1042](https://github.com/sparklemotion/nokogiri/issues/1042)) (Thanks, [@mkristian](https://github.com/mkristian)!) +* [JRuby] fix NPE when inspecting nodes returned by `NodeSet#drop` ([#1042](https://github.com/sparklemotion/nokogiri/issues/1042)) (Thanks, [@mkristian](https://github.com/mkristian)!) * [JRuby] fix nil attriubte node's namespace in reader ([#1327](https://github.com/sparklemotion/nokogiri/issues/1327)) (Thanks, [@codekitchen](https://github.com/codekitchen)!) * [JRuby] fix Nokogiri munging unicode characters that require more than 2 bytes ([#1113](https://github.com/sparklemotion/nokogiri/issues/1113)) (Thanks, [@mkristian](https://github.com/mkristian)!) * [JRuby] allow unlinking an unparented node ([#1112](https://github.com/sparklemotion/nokogiri/issues/1112), [#1152](https://github.com/sparklemotion/nokogiri/issues/1152)) (Thanks, [@esse](https://github.com/esse)!) @@ -605,7 +991,7 @@ Several changes were made to improve performance: * [MRI] Ensure C strings are null-terminated. ([#1381](https://github.com/sparklemotion/nokogiri/issues/1381)) * [MRI] Ensure Rubygems is loaded before using mini_portile2 at installation. ([#1393](https://github.com/sparklemotion/nokogiri/issues/1393), [#1411](https://github.com/sparklemotion/nokogiri/issues/1411)) (Thanks, [@JonRowe](https://github.com/JonRowe)!) * [MRI] Handling another edge case where the `libxml-ruby` gem's global callbacks were smashing the heap. ([#1426](https://github.com/sparklemotion/nokogiri/issues/1426)). (Thanks to [@bbergstrom](https://github.com/bbergstrom) for providing an isolated test case!) -* [MRI] Ensure encodings are passed to Sax::Parser xmldecl callback. ([#844](https://github.com/sparklemotion/nokogiri/issues/844)) +* [MRI] Ensure encodings are passed to `Sax::Parser` xmldecl callback. ([#844](https://github.com/sparklemotion/nokogiri/issues/844)) * [MRI] Ensure default ns prefix is applied correctly when reparenting nodes to another document. ([#391](https://github.com/sparklemotion/nokogiri/issues/391)) (Thanks, [@ylecuyer](https://github.com/ylecuyer)!) * [MRI] Ensure Reader handles non-existent attributes as expected. ([#1254](https://github.com/sparklemotion/nokogiri/issues/1254)) (Thanks, [@ccutrer](https://github.com/ccutrer)!) * [MRI] Cleanup around namespace handling when reparenting nodes. ([#1332](https://github.com/sparklemotion/nokogiri/issues/1332), [#1333](https://github.com/sparklemotion/nokogiri/issues/1333), [#1444](https://github.com/sparklemotion/nokogiri/issues/1444)) (Thanks, [@cuttrer](https://github.com/cuttrer) and [@bradleybeddoes](https://github.com/bradleybeddoes)!) @@ -679,7 +1065,7 @@ See [#1374](https://github.com/sparklemotion/nokogiri/issues/1374) and [#1376](h ### Fixed * [JRuby] reset the namespace cache when replacing the document's innerHtml ([#1265](https://github.com/sparklemotion/nokogiri/issues/1265)) (Thanks, [@mkristian](https://github.com/mkristian)!) -* [JRuby] Document#parse should support IO objects that respond to #read. ([#1124](https://github.com/sparklemotion/nokogiri/issues/1124)) (Thanks, Jake Byman!) +* [JRuby] `Document#parse` should support IO objects that respond to `#read`. ([#1124](https://github.com/sparklemotion/nokogiri/issues/1124)) (Thanks, Jake Byman!) * [MRI] Duplicate-id errors when setting the `id` attribute on HTML documents are now silenced. ([#1262](https://github.com/sparklemotion/nokogiri/issues/1262)) * [JRuby] SAX parser cuts texts in pieces when square brackets exist. ([#1261](https://github.com/sparklemotion/nokogiri/issues/1261)) * [JRuby] Namespaced attributes aren't removed by remove_attribute. ([#1299](https://github.com/sparklemotion/nokogiri/issues/1299)) @@ -723,19 +1109,19 @@ Note that 1.6.6.0 was not released. ### Added -* Unified Node and NodeSet implementations of #search, #xpath and #css. -* Added Node#lang and Node#lang=. -* bin/nokogiri passes the URI to parse() if an HTTP URL is given. -* bin/nokogiri now loads ~/.nokogirirc so user can define helper methods, etc. -* bin/nokogiri can be configured to use Pry instead of IRB by adding a couple of lines to ~/.nokogirirc. ([#1198](https://github.com/sparklemotion/nokogiri/issues/1198)) -* bin/nokogiri can better handle urls from STDIN (aiding use of xargs). ([#1065](https://github.com/sparklemotion/nokogiri/issues/1065)) +* Unified `Node` and `NodeSet` implementations of `#search`, `#xpath` and `#css`. +* Added `Node#lang` and `Node#lang=`. +* `bin/nokogiri` passes the URI to `parse()` if an HTTP URL is given. +* `bin/nokogiri` now loads `~/.nokogirirc` so user can define helper methods, etc. +* `bin/nokogiri` can be configured to use Pry instead of IRB by adding a couple of lines to ~/.nokogirirc. ([#1198](https://github.com/sparklemotion/nokogiri/issues/1198)) +* `bin/nokogiri` can better handle urls from STDIN (aiding use of xargs). ([#1065](https://github.com/sparklemotion/nokogiri/issues/1065)) * JRuby 9K support. ### Fixed -* DocumentFragment#search now matches against root nodes. ([#1205](https://github.com/sparklemotion/nokogiri/issues/1205)) -* (MRI) More fixes related to handling libxml2 parse errors during DocumentFragment#dup. ([#1196](https://github.com/sparklemotion/nokogiri/issues/1196)) +* `DocumentFragment#search` now matches against root nodes. ([#1205](https://github.com/sparklemotion/nokogiri/issues/1205)) +* (MRI) More fixes related to handling libxml2 parse errors during `DocumentFragment#dup`. ([#1196](https://github.com/sparklemotion/nokogiri/issues/1196)) * (JRuby) Builder now handles namespace hrefs properly when there is a default ns. ([#1039](https://github.com/sparklemotion/nokogiri/issues/1039)) * (JRuby) Clear the XPath cache on attr removal. ([#1109](https://github.com/sparklemotion/nokogiri/issues/1109)) * `XML::Comment.new` argument types are now consistent and safe (and documented) across MRI and JRuby. ([#1224](https://github.com/sparklemotion/nokogiri/issues/1224)) @@ -748,14 +1134,14 @@ Note that 1.6.6.0 was not released. ### Added -* Implement Slop#respond_to_missing?. ([#1176](https://github.com/sparklemotion/nokogiri/issues/1176)) +* Implement `Slop#respond_to_missing?`. ([#1176](https://github.com/sparklemotion/nokogiri/issues/1176)) * Optimized the XPath query generated by an `an+b` CSS query. ### Fixed -* Capture non-parse errors from Document#dup in Document#errors. ([#1196](https://github.com/sparklemotion/nokogiri/issues/1196)) -* (JRuby) Document#canonicalize parameters are now consistent with MRI. ([#1189](https://github.com/sparklemotion/nokogiri/issues/1189)) +* Capture non-parse errors from `Document#dup` in `Document#errors`. ([#1196](https://github.com/sparklemotion/nokogiri/issues/1196)) +* (JRuby) `Document#canonicalize` parameters are now consistent with MRI. ([#1189](https://github.com/sparklemotion/nokogiri/issues/1189)) ## 1.6.4.1 / 2014-11-05 @@ -778,7 +1164,7 @@ Note that 1.6.6.0 was not released. ### Fixed -* (MRI) Fix DocumentFragment#element_children ([#1138](https://github.com/sparklemotion/nokogiri/issues/1138)). +* (MRI) Fix `DocumentFragment#element_children` ([#1138](https://github.com/sparklemotion/nokogiri/issues/1138)). * Fix a bug with CSS attribute selector without any prefix where "foo [bar]" was treated as "foo[bar]". ([#1174](https://github.com/sparklemotion/nokogiri/issues/1174)) @@ -793,7 +1179,7 @@ Note that 1.6.6.0 was not released. ### Added -* Added Node#document? and Node#processing_instruction? +* Added `Node#document?` and `Node#processing_instruction?` ### Fixed @@ -801,8 +1187,8 @@ Note that 1.6.6.0 was not released. * [JRuby] Fix Ruby memory exhaustion vulnerability. [#1087](https://github.com/sparklemotion/nokogiri/issues/1087) (Thanks, [@ocher](https://github.com/ocher)) * [MRI] Fix segfault during GC when using `libxml-ruby` and `nokogiri` together in multi-threaded environment. [#895](https://github.com/sparklemotion/nokogiri/issues/895) (Thanks, [@ender672](https://github.com/ender672)!) * Building on OSX 10.9 stock ruby 2.0.0 now works. [#1101](https://github.com/sparklemotion/nokogiri/issues/1101) (Thanks, [@zenspider](https://github.com/zenspider)!) -* Node#parse now works again for HTML document nodes (broken in 1.6.2+). -* Processing instructions can now be added via Node#add_next_sibling. +* `Node#parse` now works again for HTML document nodes (broken in 1.6.2+). +* Processing instructions can now be added via `Node#add_next_sibling`. ## 1.6.2.1 / 2014-05-13 @@ -834,31 +1220,31 @@ Now requires libxml >= 2.6.21 (was previously >= 2.6.17). * Add cross building of fat binary gems for 64-Bit Windows (x64-mingw32) and add support for native builds on Windows. [#864](https://github.com/sparklemotion/nokogiri/issues/864), [#989](https://github.com/sparklemotion/nokogiri/issues/989), [#1072](https://github.com/sparklemotion/nokogiri/issues/1072) * (MRI) Alias CP932 to Windows-31J if iconv does not support Windows-31J. -* (MRI) Nokogiri now links packaged libraries statically. To disable static linking, pass --disable-static to extconf.rb. [#923](https://github.com/sparklemotion/nokogiri/issues/923) +* (MRI) Nokogiri now links packaged libraries statically. To disable static linking, pass --disable-static to `extconf.rb`. [#923](https://github.com/sparklemotion/nokogiri/issues/923) * (MRI) Fix a library path (LIBPATH) precedence problem caused by CRuby bug [#9760](https://github.com/sparklemotion/nokogiri/issues/9760). -* (MRI) Nokogiri automatically deletes directories of packaged libraries only used during build. To keep them for debugging purposes, pass --disable-clean to extconf.rb. [#952](https://github.com/sparklemotion/nokogiri/issues/952) +* (MRI) Nokogiri automatically deletes directories of packaged libraries only used during build. To keep them for debugging purposes, pass --disable-clean to `extconf.rb`. [#952](https://github.com/sparklemotion/nokogiri/issues/952) * (MRI) Nokogiri now builds libxml2 properly with iconv support on platforms where libiconv is installed outside the system default directories, such as FreeBSD. * Add support for an-b in nth selectors. [#886](https://github.com/sparklemotion/nokogiri/issues/886) (Thanks, Magnus Bergmark!) -* Add support for bare and multiple :not() functions in selectors. [#887](https://github.com/sparklemotion/nokogiri/issues/887) (Thanks, Magnus Bergmark!) -* (MRI) Add an extconf.rb option --use-system-libraries, alternative to setting the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES. +* Add support for bare and multiple `:not()` functions in selectors. [#887](https://github.com/sparklemotion/nokogiri/issues/887) (Thanks, Magnus Bergmark!) +* (MRI) Add an `extconf.rb` option --use-system-libraries, alternative to setting the environment variable NOKOGIRI_USE_SYSTEM_LIBRARIES. * (MRI) Update packaged libraries: libxslt to 1.1.28, zlib to 1.2.8, and libiconv to 1.14, respectively. -* Nokogiri::HTML::Document#title= and #meta_encoding= now always add an element if not present, trying hard to find the best place to put it. -* Nokogiri::XML::DTD#html_dtd? and #html5_dtd? are added. -* Nokogiri::XML::Node#prepend_child is added. [#664](https://github.com/sparklemotion/nokogiri/issues/664) -* Nokogiri::XML::SAX::ParserContext#recovery is added. [#453](https://github.com/sparklemotion/nokogiri/issues/453) -* Fix documentation for XML::Node#namespace. [#803](https://github.com/sparklemotion/nokogiri/issues/803) [#802](https://github.com/sparklemotion/nokogiri/issues/802) (Thanks, Hoylen Sue) -* Allow Nokogiri::XML::Node#parse from unparented non-element nodes. [#407](https://github.com/sparklemotion/nokogiri/issues/407) +* `Nokogiri::HTML::Document#title=` and `#meta_encoding`= now always add an element if not present, trying hard to find the best place to put it. +* `Nokogiri::XML::DTD#html_dtd?` and `#html5_dtd?` are added. +* `Nokogiri::XML::Node#prepend_child` is added. [#664](https://github.com/sparklemotion/nokogiri/issues/664) +* `Nokogiri::XML::SAX::ParserContext#recovery` is added. [#453](https://github.com/sparklemotion/nokogiri/issues/453) +* Fix documentation for `XML::Node#namespace`. [#803](https://github.com/sparklemotion/nokogiri/issues/803) [#802](https://github.com/sparklemotion/nokogiri/issues/802) (Thanks, Hoylen Sue) +* Allow `Nokogiri::XML::Node#parse` from unparented non-element nodes. [#407](https://github.com/sparklemotion/nokogiri/issues/407) ### Fixed * Ensure :only-child pseudo class works within :not pseudo class. [#858](https://github.com/sparklemotion/nokogiri/issues/858) (Thanks, Yamagishi Kazutoshi!) -* Don't call pkg_config when using bundled libraries in extconf.rb [#931](https://github.com/sparklemotion/nokogiri/issues/931) (Thanks, Shota Fukumori!) -* Nokogiri.parse() does not mistake a non-HTML document like a RSS document as HTML document. [#932](https://github.com/sparklemotion/nokogiri/issues/932) (Thanks, Yamagishi Kazutoshi!) +* Don't call pkg_config when using bundled libraries in `extconf.rb` [#931](https://github.com/sparklemotion/nokogiri/issues/931) (Thanks, Shota Fukumori!) +* `Nokogiri.parse()` does not mistake a non-HTML document like a RSS document as HTML document. [#932](https://github.com/sparklemotion/nokogiri/issues/932) (Thanks, Yamagishi Kazutoshi!) * (MRI) Perform a node type check before adding a child node to another. Previously adding a text node to another as a child could cause a SEGV. [#1092](https://github.com/sparklemotion/nokogiri/issues/1092) * (JRuby) XSD validation crashes in Java version. [#373](https://github.com/sparklemotion/nokogiri/issues/373) * (JRuby) Document already has a root node error while using Builder. [#646](https://github.com/sparklemotion/nokogiri/issues/646) * (JRuby) c14n tests are all passing on JRuby. [#226](https://github.com/sparklemotion/nokogiri/issues/226) -* Parsing empty documents raise SyntaxError in strict mode. [#1005](https://github.com/sparklemotion/nokogiri/issues/1005) +* Parsing empty documents raise `SyntaxError` in strict mode. [#1005](https://github.com/sparklemotion/nokogiri/issues/1005) * (JRuby) Make xpath faster by caching the xpath context. [#741](https://github.com/sparklemotion/nokogiri/issues/741) * (JRuby) XML SAX push parser leaks memory on JRuby, but not on MRI. [#998](https://github.com/sparklemotion/nokogiri/issues/998) * (JRuby) Inconsistent behavior aliasing the default namespace. [#940](https://github.com/sparklemotion/nokogiri/issues/940) @@ -947,7 +1333,7 @@ mentioned in the notes for v1.5.10. * (JRuby) Fix EmptyStackException thrown by elements with xlink:href attributes and no base_uri [#534](https://github.com/sparklemotion/nokogiri/issues/534), [#805](https://github.com/sparklemotion/nokogiri/issues/805). (Thanks, Patrick Quinn and Brian Hoffman!) * Fixes duplicate attributes issue introduced in 1.5.7. [#865](https://github.com/sparklemotion/nokogiri/issues/865) -* Allow use of a prefixed namespace on a root node using Nokogiri::XML::Builder [#868](https://github.com/sparklemotion/nokogiri/issues/868) +* Allow use of a prefixed namespace on a root node using `Nokogiri::XML::Builder` [#868](https://github.com/sparklemotion/nokogiri/issues/868) ## 1.5.7 / 2013-03-18 @@ -959,14 +1345,14 @@ mentioned in the notes for v1.5.10. ### Fixed -* SAX::Parser.parse_io throw an error when used with lower case encoding. [#828](https://github.com/sparklemotion/nokogiri/issues/828) +* `SAX::Parser.parse_io` throw an error when used with lower case encoding. [#828](https://github.com/sparklemotion/nokogiri/issues/828) * (JRuby) Java Nokogiri is finally green (passes all tests) under 1.8 and 1.9 mode. High five everyone. [#798](https://github.com/sparklemotion/nokogiri/issues/798), [#705](https://github.com/sparklemotion/nokogiri/issues/705) -* (JRuby) Nokogiri::XML::Reader broken (as a pull parser) on jruby - reads the whole XML document. [#831](https://github.com/sparklemotion/nokogiri/issues/831) +* (JRuby) `Nokogiri::XML::Reader` broken (as a pull parser) on jruby - reads the whole XML document. [#831](https://github.com/sparklemotion/nokogiri/issues/831) * (JRuby) JRuby hangs parsing "&". [#837](https://github.com/sparklemotion/nokogiri/issues/837) * (JRuby) JRuby NPE parsing an invalid XML instruction. [#838](https://github.com/sparklemotion/nokogiri/issues/838) -* (JRuby) Node#content= incompatibility. [#839](https://github.com/sparklemotion/nokogiri/issues/839) +* (JRuby) `Node#content=` incompatibility. [#839](https://github.com/sparklemotion/nokogiri/issues/839) * (JRuby) to_xhtml doesn't print the last slash for self-closing tags in JRuby. [#834](https://github.com/sparklemotion/nokogiri/issues/834) -* (JRuby) Adding an EntityReference after a Text node mangles the entity in JRuby. [#835](https://github.com/sparklemotion/nokogiri/issues/835) +* (JRuby) Adding an `EntityReference` after a Text node mangles the entity in JRuby. [#835](https://github.com/sparklemotion/nokogiri/issues/835) * (JRuby) JRuby version inconsistency: nil for empty attributes. [#818](https://github.com/sparklemotion/nokogiri/issues/818) * CSS queries for classes (e.g., ".foo") now treat all whitespace identically. [#854](https://github.com/sparklemotion/nokogiri/issues/854) * Namespace behavior cleaned up and made consistent between JRuby and MRI. [#846](https://github.com/sparklemotion/nokogiri/issues/846), [#801](https://github.com/sparklemotion/nokogiri/issues/801) (Thanks, Michael Klein!) @@ -977,14 +1363,14 @@ mentioned in the notes for v1.5.10. ### Added -* Improved performance of XML::Document#collect_namespaces. [#761](https://github.com/sparklemotion/nokogiri/issues/761) (Thanks, Juergen Mangler!) -* New callback SAX::Document#processing_instruction (Thanks, Kitaiti Makoto!) -* Node#native_content= allows setting unescaped node contant. [#768](https://github.com/sparklemotion/nokogiri/issues/768) +* Improved performance of `XML::Document#collect_namespaces`. [#761](https://github.com/sparklemotion/nokogiri/issues/761) (Thanks, Juergen Mangler!) +* New callback `SAX::Document#processing_instruction` (Thanks, Kitaiti Makoto!) +* `Node#native_content=` allows setting unescaped node content. [#768](https://github.com/sparklemotion/nokogiri/issues/768) * XPath lookup with namespaces supports symbol keys. [#729](https://github.com/sparklemotion/nokogiri/issues/729) (Thanks, Ben Langfeld.) -* XML::Node#[]= stringifies values. [#729](https://github.com/sparklemotion/nokogiri/issues/729) (Thanks, Ben Langfeld.) -* bin/nokogiri will process a document from $stdin -* bin/nokogiri -e will execute a program from the command line -* (JRuby) bin/nokogiri --version will print the Xerces and NekoHTML versions. +* `XML::Node#[]=` stringifies values. [#729](https://github.com/sparklemotion/nokogiri/issues/729) (Thanks, Ben Langfeld.) +* `bin/nokogiri` will process a document from $stdin +* `bin/nokogiri -e` will execute a program from the command line +* (JRuby) `bin/nokogiri --version` will print the Xerces and NekoHTML versions. ### Fixed @@ -994,15 +1380,15 @@ mentioned in the notes for v1.5.10. * Raise an ArgumentError if an invalid encoding is passed to the SAX parser. [#756](https://github.com/sparklemotion/nokogiri/issues/756) (Thanks, Bradley Schaefer!) * Prefixed element inconsistency between CRuby and JRuby. [#712](https://github.com/sparklemotion/nokogiri/issues/712) * (JRuby) space prior to xml preamble causes nokogiri to fail parsing. (fixed along with [#748](https://github.com/sparklemotion/nokogiri/issues/748)) [#790](https://github.com/sparklemotion/nokogiri/issues/790) -* (JRuby) Fixed the bug Nokogiri::XML::Node#content inconsistency between Java and C. [#794](https://github.com/sparklemotion/nokogiri/issues/794), [#797](https://github.com/sparklemotion/nokogiri/issues/797) +* (JRuby) Fixed the bug `Nokogiri::XML::Node#content` inconsistency between Java and C. [#794](https://github.com/sparklemotion/nokogiri/issues/794), [#797](https://github.com/sparklemotion/nokogiri/issues/797) * (JRuby) raises INVALID_CHARACTER_ERR exception when EntityReference name starts with '#'. [#719](https://github.com/sparklemotion/nokogiri/issues/719) * (JRuby) doesn't coerce namespaces out of strings on a direct subclass of Node. [#715](https://github.com/sparklemotion/nokogiri/issues/715) -* (JRuby) Node#content now renders newlines properly. [#737](https://github.com/sparklemotion/nokogiri/issues/737) (Thanks, Piotr Szmielew!) +* (JRuby) `Node#content` now renders newlines properly. [#737](https://github.com/sparklemotion/nokogiri/issues/737) (Thanks, Piotr Szmielew!) * (JRuby) Unknown namespace are ignore when the recover option is used. [#748](https://github.com/sparklemotion/nokogiri/issues/748) * (JRuby) XPath queries for namespaces should not throw exceptions when called twice in a row. [#764](https://github.com/sparklemotion/nokogiri/issues/764) * (JRuby) More consistent (with libxml2) whitespace formatting when emitting XML. [#771](https://github.com/sparklemotion/nokogiri/issues/771) * (JRuby) namespaced attributes broken when appending raw xml to builder. [#770](https://github.com/sparklemotion/nokogiri/issues/770) -* (JRuby) Nokogiri::XML::Document#wrap raises undefined method `length' for nil:NilClass when trying to << to a node. [#781](https://github.com/sparklemotion/nokogiri/issues/781) +* (JRuby) `Nokogiri::XML::Document#wrap` raises undefined method `length' for nil:NilClass when trying to << to a node. [#781](https://github.com/sparklemotion/nokogiri/issues/781) * (JRuby) Fixed "bad file descriptor" bug when closing open file descriptors. [#495](https://github.com/sparklemotion/nokogiri/issues/495) * (JRuby) JRuby/CRuby incompatibility for attribute decorators. [#785](https://github.com/sparklemotion/nokogiri/issues/785) * (JRuby) Issues parsing valid XML with no internal subset in the DTD. [#547](https://github.com/sparklemotion/nokogiri/issues/547), [#811](https://github.com/sparklemotion/nokogiri/issues/811) @@ -1026,10 +1412,10 @@ mentioned in the notes for v1.5.10. * JRuby 1.9 error, uncaught throw 'encoding_found', has been fixed. [#673](https://github.com/sparklemotion/nokogiri/issues/673) * Invalid encoding returned in JRuby with US-ASCII. [#583](https://github.com/sparklemotion/nokogiri/issues/583) * XmlSaxPushParser raises IndexOutOfBoundsException when over 512 characters are given. [#567](https://github.com/sparklemotion/nokogiri/issues/567), [#615](https://github.com/sparklemotion/nokogiri/issues/615) -* When xpath evaluation returns empty NodeSet, decorating NodeSet's base document raises exception. [#514](https://github.com/sparklemotion/nokogiri/issues/514) +* When xpath evaluation returns empty `NodeSet`, decorating `NodeSet`'s base document raises exception. [#514](https://github.com/sparklemotion/nokogiri/issues/514) * JRuby raises exception when xpath with namespace is specified. pull request [#681](https://github.com/sparklemotion/nokogiri/issues/681) (Thanks, Piotr Szmielew) * JRuby renders nodes without their namespace when subclassing Node. [#695](https://github.com/sparklemotion/nokogiri/issues/695) -* JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating RDF::RDFXML::Writer. [#683](https://github.com/sparklemotion/nokogiri/issues/683) +* JRuby raises NAMESPACE_ERR (org.w3c.dom.DOMException) while instantiating `RDF::RDFXML::Writer`. [#683](https://github.com/sparklemotion/nokogiri/issues/683) * JRuby is not able to use namespaces in xpath. [#493](https://github.com/sparklemotion/nokogiri/issues/493) * JRuby's Entity resolving should be consistent with C-Nokogiri [#704](https://github.com/sparklemotion/nokogiri/issues/704), [#647](https://github.com/sparklemotion/nokogiri/issues/647), [#703](https://github.com/sparklemotion/nokogiri/issues/703) @@ -1046,7 +1432,7 @@ mentioned in the notes for v1.5.10. ### Fixed * Segmentation fault when creating a comment node for a DocumentFragment. [#677](https://github.com/sparklemotion/nokogiri/issues/677), [#678](https://github.com/sparklemotion/nokogiri/issues/678). -* Treat '.' as xpath in at() and search(). [#690](https://github.com/sparklemotion/nokogiri/issues/690) +* Treat '.' as xpath in `at()` and `search()`. [#690](https://github.com/sparklemotion/nokogiri/issues/690) ### Security @@ -1071,13 +1457,13 @@ Insert your own joke about double-negatives here. ### Fixed * Custom xpath functions with empty nodeset arguments cause a segfault. [#634](https://github.com/sparklemotion/nokogiri/issues/634). -* Nokogiri::XML::Node#css now works for XML documents with default namespaces when the rule contains attribute selector without namespace. +* `Nokogiri::XML::Node#css` now works for XML documents with default namespaces when the rule contains attribute selector without namespace. * Fixed marshalling bugs around how arguments are passed to (and returned from) XSLT custom xpath functions. [#640](https://github.com/sparklemotion/nokogiri/issues/640). -* Nokogiri::XML::Reader#outer_xml is broken in JRuby [#617](https://github.com/sparklemotion/nokogiri/issues/617) -* Nokogiri::XML::Attribute on JRuby returns a nil namespace [#647](https://github.com/sparklemotion/nokogiri/issues/647) -* Nokogiri::XML::Node#namespace= cannot set a namespace without a prefix on JRuby [#648](https://github.com/sparklemotion/nokogiri/issues/648) +* `Nokogiri::XML::Reader#outer_xml` is broken in JRuby [#617](https://github.com/sparklemotion/nokogiri/issues/617) +* `Nokogiri::XML::Attribute` on JRuby returns a nil namespace [#647](https://github.com/sparklemotion/nokogiri/issues/647) +* `Nokogiri::XML::Node#namespace=` cannot set a namespace without a prefix on JRuby [#648](https://github.com/sparklemotion/nokogiri/issues/648) * (JRuby) 1.9 mode causes dead lock while running rake [#571](https://github.com/sparklemotion/nokogiri/issues/571) -* HTML::Document#meta_encoding does not raise exception on docs with malformed content-type. [#655](https://github.com/sparklemotion/nokogiri/issues/655) +* `HTML::Document#meta_encoding` does not raise exception on docs with malformed content-type. [#655](https://github.com/sparklemotion/nokogiri/issues/655) * Fixing segfault related to unsupported encodings in in-context parsing on 1.8.7. [#643](https://github.com/sparklemotion/nokogiri/issues/643) * (JRuby) Concurrency issue in XPath parsing. [#682](https://github.com/sparklemotion/nokogiri/issues/682) @@ -1091,9 +1477,9 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* XML::Builder#comment allows creation of comment nodes. +* `XML::Builder#comment` allows creation of comment nodes. * CSS searches now support namespaced attributes. [#593](https://github.com/sparklemotion/nokogiri/issues/593) -* Java integration feature is added. Now, XML::Document.wrap and XML::Document#to_java methods are available. +* Java integration feature is added. Now, `XML::Document.wrap` and `XML::Document#to_java` methods are available. * RelaxNG validator support in the `nokogiri` cli utility. [#591](https://github.com/sparklemotion/nokogiri/issues/591) (thanks, Dan Radez!) ### Fixed @@ -1101,15 +1487,15 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] * Fix many memory leaks and segfault opportunities. Thanks, Tim Elliott! * extconf searches homebrew paths if homebrew is installed. * Inconsistent behavior of Nokogiri 1.5.0 Java [#620](https://github.com/sparklemotion/nokogiri/issues/620) -* Inheriting from Nokogiri::XML::Node on JRuby (1.6.4/5) fails [#560](https://github.com/sparklemotion/nokogiri/issues/560) -* XML::Attr nodes are not allowed to be added as node children, so an exception is raised. [#558](https://github.com/sparklemotion/nokogiri/issues/558) -* No longer defensively "pickle" adjacent text nodes on Node#add_next_sibling and Node#add_previous_sibling calls. [#595](https://github.com/sparklemotion/nokogiri/issues/595). +* Inheriting from `Nokogiri::XML::Node` on JRuby (1.6.4/5) fails [#560](https://github.com/sparklemotion/nokogiri/issues/560) +* `XML::Attr` nodes are not allowed to be added as node children, so an exception is raised. [#558](https://github.com/sparklemotion/nokogiri/issues/558) +* No longer defensively "pickle" adjacent text nodes on `Node#add_next_sibling` and `Node#add_previous_sibling` calls. [#595](https://github.com/sparklemotion/nokogiri/issues/595). * Java version inconsistency: it returns nil for empty attributes [#589](https://github.com/sparklemotion/nokogiri/issues/589) -* to_xhtml incorrectly generates

when tag is empty [#557](https://github.com/sparklemotion/nokogiri/issues/557) -* Document#add_child now accepts a Node, NodeSet, DocumentFragment, or String. [#546](https://github.com/sparklemotion/nokogiri/issues/546). -* Document#create_element now recognizes namespaces containing non-word characters (like "SOAP-ENV"). This is mostly relevant to users of Builder, which calls Document#create_element for nearly everything. [#531](https://github.com/sparklemotion/nokogiri/issues/531). +* to_xhtml incorrectly generates `

` when tag is empty [#557](https://github.com/sparklemotion/nokogiri/issues/557) +* `Document#add_child` now accepts a `Node`, `NodeSet`, `DocumentFragment`, or `String`. [#546](https://github.com/sparklemotion/nokogiri/issues/546). +* `Document#create_element` now recognizes namespaces containing non-word characters (like "SOAP-ENV"). This is mostly relevant to users of Builder, which calls `Document#create_element` for nearly everything. [#531](https://github.com/sparklemotion/nokogiri/issues/531). * File encoding broken in 1.5.0 / jruby / windows [#529](https://github.com/sparklemotion/nokogiri/issues/529) -* Java version does not return namespace defs as attrs for ::HTML [#542](https://github.com/sparklemotion/nokogiri/issues/542) +* Java version does not return namespace defs as attrs for `::HTML` [#542](https://github.com/sparklemotion/nokogiri/issues/542) * Bad file descriptor with Nokogiri 1.5.0 [#495](https://github.com/sparklemotion/nokogiri/issues/495) * remove_namespace! doesn't work in pure java version [#492](https://github.com/sparklemotion/nokogiri/issues/492) * The Nokogiri Java native build throws a null pointer exception when ActiveSupport's .blank? method is called directly on a parsed object. [#489](https://github.com/sparklemotion/nokogiri/issues/489) @@ -1118,9 +1504,9 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] * Nokogiri 1.5.0 XML generation broken on JRuby [#484](https://github.com/sparklemotion/nokogiri/issues/484) * Do not allow multiple root nodes. [#550](https://github.com/sparklemotion/nokogiri/issues/550) * Fixes for custom XPath functions. [#605](https://github.com/sparklemotion/nokogiri/issues/605), [#606](https://github.com/sparklemotion/nokogiri/issues/606) (thanks, Juan Wajnerman!) -* Node#to_xml does not override :save_with if it is provided. [#505](https://github.com/sparklemotion/nokogiri/issues/505) -* Node#set is a private method (JRuby). [#564](https://github.com/sparklemotion/nokogiri/issues/564) (thanks, Nick Sieger!) -* C14n cleanup and Node#canonicalize (thanks, Ivan Pirlik!) [#563](https://github.com/sparklemotion/nokogiri/issues/563) +* `Node#to_xml` does not override `:save_with` if it is provided. [#505](https://github.com/sparklemotion/nokogiri/issues/505) +* `Node#set` is a private method (JRuby). [#564](https://github.com/sparklemotion/nokogiri/issues/564) (thanks, Nick Sieger!) +* C14n cleanup and `Node#canonicalize` (thanks, Ivan Pirlik!) [#563](https://github.com/sparklemotion/nokogiri/issues/563) ## 1.5.0 / 2011-07-01 @@ -1131,14 +1517,14 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor) +* extracted sets of `Node::SaveOptions` into `Node::SaveOptions::DEFAULT_{X,H,XH}TML` (refactor) ### Fixed * default output of XML on JRuby is no longer formatted due to inconsistent whitespace handling. [#415](https://github.com/sparklemotion/nokogiri/issues/415) -* (JRuby) making empty NodeSets with null `nodes` member safe to operate on. [#443](https://github.com/sparklemotion/nokogiri/issues/443) +* (JRuby) making empty `NodeSet`s with null `nodes` member safe to operate on. [#443](https://github.com/sparklemotion/nokogiri/issues/443) * Fix a bug in advanced encoding detection that leads to partially duplicated document when parsing an HTML file with unknown encoding. -* Add support for . +* Add support for ``. ## 1.5.0 beta3 / 2010-12-02 @@ -1150,7 +1536,7 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Fixed -* Node#inner_text no longer returns nil. (JRuby) [#264](https://github.com/sparklemotion/nokogiri/issues/264) +* `Node#inner_text` no longer returns nil. (JRuby) [#264](https://github.com/sparklemotion/nokogiri/issues/264) ## 1.5.0 beta2 / 2010-07-30 @@ -1195,68 +1581,68 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* Nokogiri::HTML::Document#title accessor gets and sets the document title. -* extracted sets of Node::SaveOptions into Node::SaveOptions::DEFAULT_{X,H,XH}TML (refactor) -* Raise an exception if a string is passed to Nokogiri::XML::Schema#validate. [#406](https://github.com/sparklemotion/nokogiri/issues/406) +* `Nokogiri::HTML::Document#title` accessor gets and sets the document title. +* extracted sets of `Node::SaveOptions` into `Node::SaveOptions::DEFAULT_{X,H,XH}TML` (refactor) +* Raise an exception if a string is passed to `Nokogiri::XML::Schema#validate`. [#406](https://github.com/sparklemotion/nokogiri/issues/406) ### Fixed -* Node#serialize-and-friends now accepts a SaveOption object as the, erm, save object. -* Nokogiri::CSS::Parser has-a Nokogiri::CSS::Tokenizer +* `Node#serialize`-and-friends now accepts a `SaveOption` object as the, erm, save object. +* `Nokogiri::CSS::Parser` has-a `Nokogiri::CSS::Tokenizer` * (JRUBY+FFI only) Weak references are now threadsafe. [#355](https://github.com/sparklemotion/nokogiri/issues/355) -* Make direct start_element() callback (currently used for HTML::SAX::Parser) pass attributes in assoc array, just as emulated start_element() callback does. rel. [#356](https://github.com/sparklemotion/nokogiri/issues/356) -* HTML::SAX::Parser should call back a block given to parse*() if any, just as XML::SAX::Parser does. +* Make direct `start_element()` callback (currently used for `HTML::SAX::Parser`) pass attributes in assoc array, just as emulated `start_element()` callback does. rel. [#356](https://github.com/sparklemotion/nokogiri/issues/356) +* `HTML::SAX::Parser` should call back a block given to `parse*()` if any, just as `XML::SAX::Parser` does. * Add further encoding detection to HTML parser that libxml2 does not do. -* Document#remove_namespaces! now handles attributes with namespaces. [#396](https://github.com/sparklemotion/nokogiri/issues/396) -* XSLT::Stylesheet#transform no longer segfaults when handed a non-XML::Document. [#452](https://github.com/sparklemotion/nokogiri/issues/452) -* XML::Reader no longer segfaults when under GC pressure. [#439](https://github.com/sparklemotion/nokogiri/issues/439) +* `Document#remove_namespaces!` now handles attributes with namespaces. [#396](https://github.com/sparklemotion/nokogiri/issues/396) +* `XSLT::Stylesheet#transform` no longer segfaults when handed a non-`XML::Document`. [#452](https://github.com/sparklemotion/nokogiri/issues/452) +* `XML::Reader` no longer segfaults when under GC pressure. [#439](https://github.com/sparklemotion/nokogiri/issues/439) ## 1.4.4 / 2010-11-15 ### Added -* XML::Node#children= sets the node's inner html (much like #inner_html=), but returns the reparent node(s). +* `XML::Node#children=` sets the node's inner html (much like #inner_html=), but returns the reparent node(s). * XSLT supports function extensions. [#336](https://github.com/sparklemotion/nokogiri/issues/336) * XPath bind parameter substitution. [#329](https://github.com/sparklemotion/nokogiri/issues/329) -* XML::Reader node type constants. [#369](https://github.com/sparklemotion/nokogiri/issues/369) +* `XML::Reader` node type constants. [#369](https://github.com/sparklemotion/nokogiri/issues/369) * SAX Parser context provides line and column information ### Fixed -* XML::DTD#attributes returns an empty hash instead of nil when there are no attributes. -* XML::DTD#{keys,each} now work as expected. [#324](https://github.com/sparklemotion/nokogiri/issues/324) -* {XML,HTML}::DocumentFragment.{new,parse} no longer strip leading and trailing whitespace. [#319](https://github.com/sparklemotion/nokogiri/issues/319) -* XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace} return a NodeSet when passed a string. +* `XML::DTD#attributes` returns an empty hash instead of nil when there are no attributes. +* `XML::DTD#{keys,each}` now work as expected. [#324](https://github.com/sparklemotion/nokogiri/issues/324) +* `{XML,HTML}::DocumentFragment.{new,parse}` no longer strip leading and trailing whitespace. [#319](https://github.com/sparklemotion/nokogiri/issues/319) +* `XML::Node#{add_child,add_previous_sibling,add_next_sibling,replace}` return a `NodeSet` when passed a string. * Unclosed tags parsed more robustly in fragments. [#315](https://github.com/sparklemotion/nokogiri/issues/315) -* XML::Node#{replace,add_previous_sibling,add_next_sibling} edge cases fixed related to libxml's text node merging. [#308](https://github.com/sparklemotion/nokogiri/issues/308) +* `XML::Node#{replace,add_previous_sibling,add_next_sibling}` edge cases fixed related to libxml's text node merging. [#308](https://github.com/sparklemotion/nokogiri/issues/308) * Fixed a segfault when GC occurs during xpath handler argument marshalling. [#345](https://github.com/sparklemotion/nokogiri/issues/345) -* Added hack to Slop decorator to work with previously defined methods. [#330](https://github.com/sparklemotion/nokogiri/issues/330) +* Added hack to `Slop` decorator to work with previously defined methods. [#330](https://github.com/sparklemotion/nokogiri/issues/330) * Fix a memory leak when duplicating child nodes. [#353](https://github.com/sparklemotion/nokogiri/issues/353) -* Fixed off-by-one bug with nth-last-{child,of-type} CSS selectors when NOT using an+b notation. [#354](https://github.com/sparklemotion/nokogiri/issues/354) -* Fixed passing of non-namespace attributes to SAX::Document#start_element. [#356](https://github.com/sparklemotion/nokogiri/issues/356) +* Fixed off-by-one bug with `nth-last-{child,of-type}` CSS selectors when NOT using `an+b` notation. [#354](https://github.com/sparklemotion/nokogiri/issues/354) +* Fixed passing of non-namespace attributes to `SAX::Document#start_element`. [#356](https://github.com/sparklemotion/nokogiri/issues/356) * Workaround for libxml2 in-context parsing bug. [#362](https://github.com/sparklemotion/nokogiri/issues/362) -* Fixed NodeSet#wrap on nodes within a fragment. [#331](https://github.com/sparklemotion/nokogiri/issues/331) +* Fixed `NodeSet#wrap` on nodes within a fragment. [#331](https://github.com/sparklemotion/nokogiri/issues/331) ## 1.4.3 / 2010-07-28 ### Added -* XML::Reader#empty_element? returns true for empty elements. [#262](https://github.com/sparklemotion/nokogiri/issues/262) -* Node#remove_namespaces! now removes namespace *declarations* as well. [#294](https://github.com/sparklemotion/nokogiri/issues/294) -* NodeSet#at_xpath, NodeSet#at_css and NodeSet#> do what the corresponding methods of Node do. +* `XML::Reader#empty_element?` returns true for empty elements. [#262](https://github.com/sparklemotion/nokogiri/issues/262) +* `Node#remove_namespaces!` now removes namespace *declarations* as well. [#294](https://github.com/sparklemotion/nokogiri/issues/294) +* `NodeSet#at_xpath`, `NodeSet#at_css` and `NodeSet#>` do what the corresponding methods of `Node` do. ### Fixed -* XML::NodeSet#{include?,delete,push} accept an XML::Namespace -* XML::Document#parse added for parsing in the context of a document -* XML::DocumentFragment#inner_html= works with contextual parsing! [#298](https://github.com/sparklemotion/nokogiri/issues/298), [#281](https://github.com/sparklemotion/nokogiri/issues/281) -* lib/nokogiri/css/parser.y Combined CSS functions + pseudo selectors fixed +* `XML::NodeSet#{include?,delete,push}` accept an `XML::Namespace` +* `XML::Document#parse` added for parsing in the context of a document +* `XML::DocumentFragment#inner_html=` works with contextual parsing! [#298](https://github.com/sparklemotion/nokogiri/issues/298), [#281](https://github.com/sparklemotion/nokogiri/issues/281) +* `lib/nokogiri/css/parser.y` Combined CSS functions + pseudo selectors fixed * Reparenting text nodes is safe, even when the operation frees adjacent merged nodes. [#283](https://github.com/sparklemotion/nokogiri/issues/283) -* Fixed libxml2 versionitis issue with xmlFirstElementChild et al. [#303](https://github.com/sparklemotion/nokogiri/issues/303) -* XML::Attr#add_namespace now works as expected. [#252](https://github.com/sparklemotion/nokogiri/issues/252) -* HTML::DocumentFragment uses the string's encoding. [#305](https://github.com/sparklemotion/nokogiri/issues/305) +* Fixed libxml2 versionitis issue with `xmlFirstElementChild` et al. [#303](https://github.com/sparklemotion/nokogiri/issues/303) +* `XML::Attr#add_namespace` now works as expected. [#252](https://github.com/sparklemotion/nokogiri/issues/252) +* `HTML::DocumentFragment` uses the string's encoding. [#305](https://github.com/sparklemotion/nokogiri/issues/305) * Fix the CSS3 selector translation rule for the general sibling combinator (a.k.a. preceding selector) that incorrectly converted "E ~ F G" to "//F//G[preceding-sibling::E]". @@ -1264,32 +1650,32 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* XML::Node#parse will parse XML or HTML fragments with respect to the context node. -* XML::Node#namespaces returns all namespaces defined in the node and all ancestor nodes (previously did not return ancestors' namespace definitions). -* Added Enumerable to XML::Node -* Nokogiri::XML::Schema#validate now uses xmlSchemaValidateFile if a filename is passed, which is faster and more memory-efficient. GH [#219](https://github.com/sparklemotion/nokogiri/issues/219) -* XML::Document#create_entity will create new EntityDecl objects. GH [#174](https://github.com/sparklemotion/nokogiri/issues/174) -* JRuby FFI implementation no longer uses ObjectSpace._id2ref, instead using Charles Nutter's rocking Weakling gem. -* Nokogiri::XML::Node#first_element_child fetch the first child node that is an ELEMENT node. -* Nokogiri::XML::Node#last_element_child fetch the last child node that is an ELEMENT node. -* Nokogiri::XML::Node#elements fetch all children nodes that are ELEMENT nodes. -* Nokogiri::XML::Node#add_child, #add_previous_sibling, #before, #add_next_sibling, #after, #inner_html, #swap and #replace all now accept a Node, DocumentFragment, NodeSet, or a string containing markup. -* Node#fragment? indicates whether a node is a DocumentFragment. - -### Fixed - -* XML::NodeSet is now always decorated (if the document has decorators). GH [#198](https://github.com/sparklemotion/nokogiri/issues/198) -* XML::NodeSet#slice gracefully handles offset+length larger than the set length. GH [#200](https://github.com/sparklemotion/nokogiri/issues/200) -* XML::Node#content= safely unlinks previous content. GH [#203](https://github.com/sparklemotion/nokogiri/issues/203) -* XML::Node#namespace= takes nil as a parameter -* XML::Node#xpath returns things other than NodeSet objects. GH [#208](https://github.com/sparklemotion/nokogiri/issues/208) -* XSLT::StyleSheet#transform accepts hashes for parameters. GH [#223](https://github.com/sparklemotion/nokogiri/issues/223) -* Psuedo selectors inside not() work. GH [#205](https://github.com/sparklemotion/nokogiri/issues/205) -* XML::Builder doesn't break when nodes are unlinked. Thanks to vihai! GH [#228](https://github.com/sparklemotion/nokogiri/issues/228) +* `XML::Node#parse` will parse XML or HTML fragments with respect to the context node. +* `XML::Node#namespaces` returns all namespaces defined in the node and all ancestor nodes (previously did not return ancestors' namespace definitions). +* Added `Enumerable` to `XML::Node` +* `Nokogiri::XML::Schema#validate` now uses xmlSchemaValidateFile if a filename is passed, which is faster and more memory-efficient. GH [#219](https://github.com/sparklemotion/nokogiri/issues/219) +* `XML::Document#create_entity` will create new `EntityDecl` objects. GH [#174](https://github.com/sparklemotion/nokogiri/issues/174) +* JRuby FFI implementation no longer uses `ObjectSpace._id2ref`, instead using Charles Nutter's rocking Weakling gem. +* `Nokogiri::XML::Node#first_element_child` fetch the first child node that is an ELEMENT node. +* `Nokogiri::XML::Node#last_element_child` fetch the last child node that is an ELEMENT node. +* `Nokogiri::XML::Node#elements` fetch all children nodes that are ELEMENT nodes. +* `Nokogiri::XML::Node#add_child`, `#add_previous_sibling`, `#before`, `#add_next_sibling`, `#after`, `#inner_html`, `#swap` and `#replace` all now accept a `Node`, `DocumentFragment`, `NodeSet`, or a string containing markup. +* `Node#fragment?` indicates whether a node is a `DocumentFragment`. + +### Fixed + +* `XML::NodeSet` is now always decorated (if the document has decorators). GH [#198](https://github.com/sparklemotion/nokogiri/issues/198) +* `XML::NodeSet#slice` gracefully handles offset+length larger than the set length. GH [#200](https://github.com/sparklemotion/nokogiri/issues/200) +* `XML::Node#content=` safely unlinks previous content. GH [#203](https://github.com/sparklemotion/nokogiri/issues/203) +* `XML::Node#namespace=` takes nil as a parameter +* `XML::Node#xpath` returns things other than `NodeSet` objects. GH [#208](https://github.com/sparklemotion/nokogiri/issues/208) +* `XSLT::StyleSheet#transform` accepts hashes for parameters. GH [#223](https://github.com/sparklemotion/nokogiri/issues/223) +* Psuedo selectors inside `not()` work. GH [#205](https://github.com/sparklemotion/nokogiri/issues/205) +* `XML::Builder` doesn't break when nodes are unlinked. Thanks to vihai! GH [#228](https://github.com/sparklemotion/nokogiri/issues/228) * Encoding can be forced on the SAX parser. Thanks Eugene Pimenov! GH [#204](https://github.com/sparklemotion/nokogiri/issues/204) -* XML::DocumentFragment uses XML::Node#parse to determine children. +* `XML::DocumentFragment` uses `XML::Node#parse` to determine children. * Fixed a memory leak in xml reader. Thanks sdor! GH [#244](https://github.com/sparklemotion/nokogiri/issues/244) -* Node#replace returns the new child node as claimed in the RDoc. Previously returned +self+. +* `Node#replace` returns the new child node as claimed in the RDoc. Previously returned +self+. ### Notes @@ -1300,30 +1686,30 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* Added Nokogiri::LIBXML_ICONV_ENABLED -* Alias Node#[] to Node#attr -* XML::Node#next_element added -* XML::Node#> added for searching a nodes immediate children -* XML::NodeSet#reverse added -* Added fragment support to Node#add_child, Node#add_next_sibling, Node#add_previous_sibling, and Node#replace. -* XML::Node#previous_element implemented +* Added `Nokogiri::LIBXML_ICONV_ENABLED` +* Alias `Node#[]` to `Node#attr` +* `XML::Node#next_element` added +* `XML::Node#>` added for searching a nodes immediate children +* `XML::NodeSet#reverse` added +* Added fragment support to `Node#add_child`, `Node#add_next_sibling`, `Node#add_previous_sibling`, and `Node#replace`. +* `XML::Node#previous_element` implemented * Rubinius support -* Ths CSS selector engine now supports :has() -* XML::NodeSet#filter() was added -* XML::Node.next= and .previous= are aliases for add_next_sibling and add_previous_sibling. GH [#183](https://github.com/sparklemotion/nokogiri/issues/183) +* Ths CSS selector engine now supports `:has()` +* `XML::NodeSet#filter()` was added +* `XML::Node.next=` and .previous= are aliases for add_next_sibling and add_previous_sibling. GH [#183](https://github.com/sparklemotion/nokogiri/issues/183) ### Fixed * XML fragments with namespaces do not raise an exception (regression in 1.4.0) -* Node#matches? works in nodes contained by a DocumentFragment. GH [#158](https://github.com/sparklemotion/nokogiri/issues/158) -* Document should not define add_namespace() method. GH [#169](https://github.com/sparklemotion/nokogiri/issues/169) -* XPath queries returning namespace declarations do not segfault. -* Node#replace works with nodes from different documents. GH [#162](https://github.com/sparklemotion/nokogiri/issues/162) -* Adding XML::Document#collect_namespaces +* `Node#matches?` works in nodes contained by a `DocumentFragment`. GH [#158](https://github.com/sparklemotion/nokogiri/issues/158) +* `Document` should not define `add_namespace()` method. GH [#169](https://github.com/sparklemotion/nokogiri/issues/169) +* `XPath` queries returning namespace declarations do not segfault. +* `Node#replace` works with nodes from different documents. GH [#162](https://github.com/sparklemotion/nokogiri/issues/162) +* Adding `XML::Document#collect_namespaces` * Fixed bugs in the SOAP4R adapter -* Fixed bug in XML::Node#next_element for certain edge cases +* Fixed bug in `XML::Node#next_element` for certain edge cases * Fixed load path issue with JRuby under Windows. GH [#160](https://github.com/sparklemotion/nokogiri/issues/160). -* XSLT#apply_to will honor the "output method". Thanks richardlehane! +* `XSLT#apply_to` will honor the "output method". Thanks richardlehane! * Fragments containing leading text nodes with newlines now parse properly. GH [#178](https://github.com/sparklemotion/nokogiri/issues/178). @@ -1331,35 +1717,35 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* Node#at_xpath returns the first element of the NodeSet matching the XPath expression. -* Node#at_css returns the first element of the NodeSet matching the CSS selector. -* NodeSet#| for unions GH [#119](https://github.com/sparklemotion/nokogiri/issues/119) (Thanks Serabe!) -* NodeSet#inspect makes prettier output -* Node#inspect implemented for more rubyish document inspecting -* Added XML::DTD#external_id -* Added XML::DTD#system_id -* Added XML::ElementContent for DTD Element content validity -* Better namespace declaration support in Nokogiri::XML::Builder -* Added XML::Node#external_subset -* Added XML::Node#create_external_subset -* Added XML::Node#create_internal_subset +* `Node#at_xpath` returns the first element of the `NodeSet` matching the XPath expression. +* `Node#at_css` returns the first element of the `NodeSet` matching the CSS selector. +* `NodeSet#|` for unions GH [#119](https://github.com/sparklemotion/nokogiri/issues/119) (Thanks Serabe!) +* `NodeSet#inspect` makes prettier output +* `Node#inspect` implemented for more rubyish document inspecting +* Added `XML::DTD#external_id` +* Added `XML::DTD#system_id` +* Added `XML::ElementContent` for DTD Element content validity +* Better namespace declaration support in `Nokogiri::XML::Builder` +* Added `XML::Node#external_subset` +* Added `XML::Node#create_external_subset` +* Added `XML::Node#create_internal_subset` * XML Builder can append raw strings (GH [#141](https://github.com/sparklemotion/nokogiri/issues/141), patch from dudleyf) -* XML::SAX::ParserContext added -* XML::Document#remove_namespaces! for the namespace-impaired +* `XML::SAX::ParserContext` added +* `XML::Document#remove_namespaces!` for the namespace-impaired ### Fixed * returns nil when HTML documents do not declare a meta encoding tag. GH [#115](https://github.com/sparklemotion/nokogiri/issues/115) -* Uses RbConfig::CONFIG['host_os'] to adjust ENV['PATH'] GH [#113](https://github.com/sparklemotion/nokogiri/issues/113) -* NodeSet#search is more efficient GH [#119](https://github.com/sparklemotion/nokogiri/issues/119) (Thanks Serabe!) -* NodeSet#xpath handles custom xpath functions -* Fixing a SEGV when XML::Reader gets attributes for current node -* Node#inner_html takes the same arguments as Node#to_html GH [#117](https://github.com/sparklemotion/nokogiri/issues/117) -* DocumentFragment#css delegates to it's child nodes GH [#123](https://github.com/sparklemotion/nokogiri/issues/123) -* NodeSet#[] works with slices larger than NodeSet#length GH [#131](https://github.com/sparklemotion/nokogiri/issues/131) +* Uses `RbConfig::CONFIG['host_os']` to adjust `ENV['PATH']` GH [#113](https://github.com/sparklemotion/nokogiri/issues/113) +* `NodeSet#search` is more efficient GH [#119](https://github.com/sparklemotion/nokogiri/issues/119) (Thanks Serabe!) +* `NodeSet#xpath` handles custom xpath functions +* Fixing a SEGV when `XML::Reader` gets attributes for current node +* `Node#inner_html` takes the same arguments as `Node#to_html` GH [#117](https://github.com/sparklemotion/nokogiri/issues/117) +* `DocumentFragment#css` delegates to it's child nodes GH [#123](https://github.com/sparklemotion/nokogiri/issues/123) +* `NodeSet#[]` works with slices larger than `NodeSet#length` GH [#131](https://github.com/sparklemotion/nokogiri/issues/131) * Reparented nodes maintain their namespace. GH [#134](https://github.com/sparklemotion/nokogiri/issues/134) -* Fixed SEGV when adding an XML::Document to NodeSet -* XML::SyntaxError can be duplicated. GH [#148](https://github.com/sparklemotion/nokogiri/issues/148) +* Fixed SEGV when adding an `XML::Document` to `NodeSet` +* `XML::SyntaxError` can be duplicated. GH [#148](https://github.com/sparklemotion/nokogiri/issues/148) ### Removed @@ -1370,13 +1756,13 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* NodeSet#children returns all children of all nodes +* `NodeSet#children` returns all children of all nodes ### Fixed * Override libxml-ruby's global error handler -* ParseOption#strict fixed -* Fixed a segfault when sending an empty string to Node#inner_html= GH [#88](https://github.com/sparklemotion/nokogiri/issues/88) +* `ParseOption#strict` fixed +* Fixed a segfault when sending an empty string to `Node#inner_html=` GH [#88](https://github.com/sparklemotion/nokogiri/issues/88) * String encoding is now set to UTF-8 in Ruby 1.9 * Fixed a segfault when moving root nodes between documents. GH [#91](https://github.com/sparklemotion/nokogiri/issues/91) * Fixed an O(n) penalty on node creation. GH [#101](https://github.com/sparklemotion/nokogiri/issues/101) @@ -1391,29 +1777,29 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* Nokogiri::XML::DTD#validate will validate your document +* `Nokogiri::XML::DTD#validate` will validate your document ### Fixed -* Nokogiri::XML::NodeSet#search will search top level nodes. GH [#73](https://github.com/sparklemotion/nokogiri/issues/73) -* Removed namespace related methods from Nokogiri::XML::Document +* `Nokogiri::XML::NodeSet#search` will search top level nodes. GH [#73](https://github.com/sparklemotion/nokogiri/issues/73) +* Removed namespace related methods from `Nokogiri::XML::Document` * Fixed a segfault when a namespace was added twice * Made nokogiri work with Snow Leopard GH [#79](https://github.com/sparklemotion/nokogiri/issues/79) * Mailing list has moved to: http://groups.google.com/group/nokogiri-talk * HTML fragments now correctly handle comments and CDATA blocks. GH [#78](https://github.com/sparklemotion/nokogiri/issues/78) -* Nokogiri::XML::Document#clone is now an alias of dup +* `Nokogiri::XML::Document#clone` is now an alias of dup ### Deprecations -* Nokogiri::XML::SAX::Document#start_element_ns is deprecated, please switch to Nokogiri::XML::SAX::Document#start_element_namespace -* Nokogiri::XML::SAX::Document#end_element_ns is deprecated, please switch to Nokogiri::XML::SAX::Document#end_element_namespace +* `Nokogiri::XML::SAX::Document#start_element_ns` is deprecated, please switch to `Nokogiri::XML::SAX::Document#start_element_namespace` +* `Nokogiri::XML::SAX::Document#end_element_ns` is deprecated, please switch to `Nokogiri::XML::SAX::Document#end_element_namespace` ## 1.3.1 / 2009-06-07 ### Fixed -* extconf.rb checks for optional RelaxNG and Schema functions +* `extconf.rb` checks for optional RelaxNG and Schema functions * Namespace nodes are added to the Document node cache @@ -1423,24 +1809,24 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] * Builder changes scope based on block arity * Builder supports methods ending in underscore similar to tagz -* Nokogiri::XML::Node#<=> compares nodes based on Document position -* Nokogiri::XML::Node#matches? returns true if Node can be found with given selector. -* Nokogiri::XML::Node#ancestors now returns an Nokogiri::XML::NodeSet -* Nokogiri::XML::Node#ancestors will match parents against optional selector -* Nokogiri::HTML::Document#meta_encoding for getting the meta encoding -* Nokogiri::HTML::Document#meta_encoding= for setting the meta encoding -* Nokogiri::XML::Document#encoding= to set the document encoding -* Nokogiri::XML::Schema for validating documents against XSD schema -* Nokogiri::XML::RelaxNG for validating documents against RelaxNG schema -* Nokogiri::HTML::ElementDescription for fetching HTML element descriptions -* Nokogiri::XML::Node#description to fetch the node description -* Nokogiri::XML::Node#accept implements Visitor pattern -* bin/nokogiri for easily examining documents (Thanks Yutaka HARA!) -* Nokogiri::XML::NodeSet now supports more Array and Enumerable operators: index, delete, slice, - (difference), + (concatenation), & (intersection), push, pop, shift, == -* Nokogiri.XML, Nokogiri.HTML take blocks that receive Nokogiri::XML::ParseOptions objects -* Nokogiri::XML::Node#namespace returns a Nokogiri::XML::Namespace -* Nokogiri::XML::Node#namespace= for setting a node's namespace -* Nokogiri::XML::DocumentFragment and Nokogiri::HTML::DocumentFragment have a sensible API and a more robust implementation. +* `Nokogiri::XML::Node#<=>` compares nodes based on Document position +* `Nokogiri::XML::Node#matches?` returns true if Node can be found with given selector. +* `Nokogiri::XML::Node#ancestors` now returns an `Nokogiri::XML::NodeSet` +* `Nokogiri::XML::Node#ancestors` will match parents against optional selector +* `Nokogiri::HTML::Document#meta_encoding` for getting the meta encoding +* `Nokogiri::HTML::Document#meta_encoding=` for setting the meta encoding +* `Nokogiri::XML::Document#encoding=` to set the document encoding +* `Nokogiri::XML::Schema` for validating documents against XSD schema +* `Nokogiri::XML::RelaxNG` for validating documents against RelaxNG schema +* `Nokogiri::HTML::ElementDescription` for fetching HTML element descriptions +* `Nokogiri::XML::Node#description` to fetch the node description +* `Nokogiri::XML::Node#accept` implements Visitor pattern +* `bin/nokogiri` for easily examining documents (Thanks Yutaka HARA!) +* `Nokogiri::XML::NodeSet` now supports more Array and Enumerable operators: index, delete, slice, - (difference), + (concatenation), & (intersection), push, pop, shift, == +* `Nokogiri.XML`, `Nokogiri.HTML` take blocks that receive `Nokogiri::XML::ParseOptions` objects +* `Nokogiri::XML::Node#namespace` returns a `Nokogiri::XML::Namespace` +* `Nokogiri::XML::Node#namespace=` for setting a node's namespace +* `Nokogiri::XML::DocumentFragment` and `Nokogiri::HTML::DocumentFragment` have a sensible API and a more robust implementation. * JRuby 1.3.0 support via FFI. ### Fixed @@ -1452,17 +1838,17 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] * Fixed bug with unlinking a node then reparenting it. (GH[#22](https://github.com/sparklemotion/nokogiri/issues/22)) * Fixed failure to catch errors during XSLT parsing (GH[#32](https://github.com/sparklemotion/nokogiri/issues/32)) * Fixed a bug with attribute conditions in CSS selectors (GH[#36](https://github.com/sparklemotion/nokogiri/issues/36)) -* Fixed intolerance of HTML attributes without values in Node#before/after/inner_html=. (GH[#35](https://github.com/sparklemotion/nokogiri/issues/35)) +* Fixed intolerance of HTML attributes without values in `Node#{before/after/inner_html=}`. (GH[#35](https://github.com/sparklemotion/nokogiri/issues/35)) ## 1.2.3 / 2009-03-22 ### Fixed -* Fixing bug where a node is passed in to Node#new +* Fixing bug where a node is passed in to `Node#new` * Namespace should be assigned on DocumentFragment creation. LH [#66](https://github.com/sparklemotion/nokogiri/issues/66) -* Nokogiri::XML::NodeSet#dup works GH [#10](https://github.com/sparklemotion/nokogiri/issues/10) -* Nokogiri::HTML returns an empty Document when given a blank string GH[#11](https://github.com/sparklemotion/nokogiri/issues/11) +* `Nokogiri::XML::NodeSet#dup` works GH [#10](https://github.com/sparklemotion/nokogiri/issues/10) +* `Nokogiri::HTML` returns an empty Document when given a blank string GH[#11](https://github.com/sparklemotion/nokogiri/issues/11) * Adding a child will remove duplicate namespace declarations LH [#67](https://github.com/sparklemotion/nokogiri/issues/67) * Builder methods take a hash as a second argument @@ -1471,27 +1857,27 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Added -* Nokogiri may be used with soap4r. See XSD::XMLParser::Nokogiri -* Nokogiri::XML::Node#inner_html= to set the inner html for a node +* Nokogiri may be used with soap4r. See `XSD::XMLParser::Nokogiri` +* `Nokogiri::XML::Node#inner_html=` to set the inner html for a node * Nokogiri builder interface improvements -* Nokogiri::XML::Node#swap swaps html for current node (LH [#50](https://github.com/sparklemotion/nokogiri/issues/50)) +* `Nokogiri::XML::Node#swap` swaps html for current node (LH [#50](https://github.com/sparklemotion/nokogiri/issues/50)) ### Fixed * Fixed a tag nesting problem in the Builder API (LH [#41](https://github.com/sparklemotion/nokogiri/issues/41)) -* Nokogiri::HTML.fragment will properly handle text only nodes (LH [#43](https://github.com/sparklemotion/nokogiri/issues/43)) -* Nokogiri::XML::Node#before will prepend text nodes (LH [#44](https://github.com/sparklemotion/nokogiri/issues/44)) -* Nokogiri::XML::Node#after will append text nodes -* Nokogiri::XML::Node#search automatically registers root namespaces (LH [#42](https://github.com/sparklemotion/nokogiri/issues/42)) -* Nokogiri::XML::NodeSet#search automatically registers namespaces -* Nokogiri::HTML::NamedCharacters delegates to libxml2 -* Nokogiri::XML::Node#[] can take a symbol (LH [#48](https://github.com/sparklemotion/nokogiri/issues/48)) +* `Nokogiri::HTML.fragment` will properly handle text only nodes (LH [#43](https://github.com/sparklemotion/nokogiri/issues/43)) +* `Nokogiri::XML::Node#before` will prepend text nodes (LH [#44](https://github.com/sparklemotion/nokogiri/issues/44)) +* `Nokogiri::XML::Node#after` will append text nodes +* `Nokogiri::XML::Node#search` automatically registers root namespaces (LH [#42](https://github.com/sparklemotion/nokogiri/issues/42)) +* `Nokogiri::XML::NodeSet#search` automatically registers namespaces +* `Nokogiri::HTML::NamedCharacters` delegates to libxml2 +* `Nokogiri::XML::Node#[]` can take a symbol (LH [#48](https://github.com/sparklemotion/nokogiri/issues/48)) * vasprintf for windows updated. Thanks Geoffroy Couprie! -* Nokogiri::XML::Node#[]= should not encode entities (LH [#55](https://github.com/sparklemotion/nokogiri/issues/55)) +* `Nokogiri::XML::Node#[]=` should not encode entities (LH [#55](https://github.com/sparklemotion/nokogiri/issues/55)) * Namespaces should be copied to reparented nodes (LH [#56](https://github.com/sparklemotion/nokogiri/issues/56)) * Nokogiri uses encoding set on the string for default in Ruby 1.9 -* Document#dup should create a new document of the same type (LH [#59](https://github.com/sparklemotion/nokogiri/issues/59)) -* Document should not have a parent method (LH [#64](https://github.com/sparklemotion/nokogiri/issues/64)) +* `Document#dup` should create a new document of the same type (LH [#59](https://github.com/sparklemotion/nokogiri/issues/59)) +* `Document` should not have a parent method (LH [#64](https://github.com/sparklemotion/nokogiri/issues/64)) ## 1.2.1 / 2009-02-23 @@ -1509,69 +1895,69 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] * CSS search now supports CSS3 namespace queries * Namespaces on the root node are automatically registered * CSS queries use the default namespace -* Nokogiri::XML::Document#encoding get encoding used for this document -* Nokogiri::XML::Document#url get the document url -* Nokogiri::XML::Node#add_namespace add a namespace to the node LH[#38](https://github.com/sparklemotion/nokogiri/issues/38) -* Nokogiri::XML::Node#each iterate over attribute name, value pairs -* Nokogiri::XML::Node#keys get all attribute names -* Nokogiri::XML::Node#line get the line number for a node (Thanks Dirkjan Bussink!) -* Nokogiri::XML::Node#serialize now takes an optional encoding parameter -* Nokogiri::XML::Node#to_html, to_xml, and to_xhtml take an optional encoding -* Nokogiri::XML::Node#to_str -* Nokogiri::XML::Node#to_xhtml to produce XHTML documents -* Nokogiri::XML::Node#values get all attribute values -* Nokogiri::XML::Node#write_to writes the node to an IO object with optional encoding -* Nokogiri::XML::ProcessingInstrunction.new -* Nokogiri::XML::SAX::PushParser for all your push parsing needs. - -### Fixed - -* Fixed Nokogiri::XML::Document#dup +* `Nokogiri::XML::Document#encoding` get encoding used for this document +* `Nokogiri::XML::Document#url` get the document url +* `Nokogiri::XML::Node#add_namespace` add a namespace to the node LH[#38](https://github.com/sparklemotion/nokogiri/issues/38) +* `Nokogiri::XML::Node#each` iterate over attribute name, value pairs +* `Nokogiri::XML::Node#keys` get all attribute names +* `Nokogiri::XML::Node#line` get the line number for a node (Thanks Dirkjan Bussink!) +* `Nokogiri::XML::Node#serialize` now takes an optional encoding parameter +* `Nokogiri::XML::Node#to_html`, to_xml, and to_xhtml take an optional encoding +* `Nokogiri::XML::Node#to_str` +* `Nokogiri::XML::Node#to_xhtml` to produce XHTML documents +* `Nokogiri::XML::Node#values` get all attribute values +* `Nokogiri::XML::Node#write_to` writes the node to an IO object with optional encoding +* `Nokogiri::XML::ProcessingInstruction.new` +* `Nokogiri::XML::SAX::PushParser` for all your push parsing needs. + +### Fixed + +* Fixed `Nokogiri::XML::Document#dup` * Fixed header detection. Thanks rubikitch! * Fixed a problem where invalid CSS would cause the parser to hang ### Deprecations -* Nokogiri::XML::Node.new_from_str will be deprecated in 1.3.0 +* `Nokogiri::XML::Node.new_from_str` will be deprecated in 1.3.0 ### Changed -* Nokogiri::HTML.fragment now returns an XML::DocumentFragment (LH [#32](https://github.com/sparklemotion/nokogiri/issues/32)) +* `Nokogiri::HTML.fragment` now returns an XML::DocumentFragment (LH [#32](https://github.com/sparklemotion/nokogiri/issues/32)) ## 1.1.1 ### Added -* Added XML::Node#elem? -* Added XML::Node#attribute_nodes -* Added XML::Attr -* XML::Node#delete added. -* XML::NodeSet#inner_html added. +* Added `XML::Node#elem?` +* Added `XML::Node#attribute_nodes` +* Added `XML::Attr` +* `XML::Node#delete` added. +* `XML::NodeSet#inner_html` added. ### Fixed * Not including an HTML entity for \r for HTML nodes. -* Removed CSS::SelectorHandler and XML::XPathHandler -* XML::Node#attributes returns an Attr node for the value. -* XML::NodeSet implements to_xml +* Removed `CSS::SelectorHandler` and `XML::XPathHandler` +* `XML::Node#attributes` returns an `Attr` node for the value. +* `XML::NodeSet` implements `to_xml` ## 1.1.0 ### Added -* Custom XPath functions are now supported. See Nokogiri::XML::Node#xpath -* Custom CSS pseudo classes are now supported. See Nokogiri::XML::Node#css -* Nokogiri::XML::Node#<< will add a child to the current node +* Custom XPath functions are now supported. See `Nokogiri::XML::Node#xpath` +* Custom CSS pseudo classes are now supported. See `Nokogiri::XML::Node#css` +* `Nokogiri::XML::Node#<<` will add a child to the current node ### Fixed * Mutex lock on CSS cache access * Fixed build problems with GCC 3.3.5 -* XML::Node#to_xml now takes an indentation argument -* XML::Node#dup takes an optional depth argument -* XML::Node#add_previous_sibling returns new sibling node. +* `XML::Node#to_xml` now takes an indentation argument +* `XML::Node#dup` takes an optional depth argument +* `XML::Node#add_previous_sibling` returns new sibling node. ## 1.0.7 @@ -1581,8 +1967,8 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] * Fixed memory leak when using Dike * SAX parser now parses IO streams * Comment nodes have their own class -* Nokogiri() should delegate to Nokogiri.parse() -* Prepending rather than appending to ENV['PATH'] on windows +* `Nokogiri()` should delegate to `Nokogiri.parse()` +* Prepending rather than appending to `ENV['PATH']` on windows * Fixed a bug in complex CSS negation selectors @@ -1590,10 +1976,10 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Fixed -* XPath Parser raises a SyntaxError on parse failure -* CSS Parser raises a SyntaxError on parse failure -* filter() and not() hpricot compatibility added -* CSS searches via Node#search are now always relative +* XPath Parser raises a `SyntaxError` on parse failure +* CSS Parser raises a `SyntaxError` on parse failure +* `filter()` and `not()` hpricot compatibility added +* CSS searches via `Node#search` are now always relative * CSS to XPath conversion is now cached @@ -1601,9 +1987,9 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Fixed -* Added mailing list and ticket tracking information to the README.txt -* Sets ENV['PATH'] on windows if it doesn't exist -* Caching results of NodeSet#[] on Document +* Added mailing list and ticket tracking information to the `README.txt` +* Sets `ENV['PATH']` on windows if it doesn't exist +* Caching results of `NodeSet#[]` on `Document` ## 1.0.4 @@ -1619,25 +2005,25 @@ Repackaging of 1.5.1 with a gemspec that is compatible with older Rubies. [#631] ### Fixed -* NodeSet now implements to_ary -* XML::Document should not implement parent +* `NodeSet` now implements `to_ary` +* `XML::Document` should not implement parent * More GC Bugs fixed. (Mike is AWESOME!) * Removed RARRAY_LEN for 1.8.5 compatibility. Thanks Shane Hanna. -* inner_html fixed. (Thanks Yehuda!) +* `inner_html` fixed. (Thanks Yehuda!) ## 1.0.2 ### Fixed -* extconf.rb should not check for frex and racc +* `extconf.rb` should not check for frex and racc ## 1.0.1 ### Fixed -* Made sure extconf.rb searched libdir and prefix so that ports libxml/ruby will link properly. Thanks lucsky! +* Made sure `extconf.rb` searched libdir and prefix so that ports libxml/ruby will link properly. Thanks lucsky! ## 1.0.0 / 2008-07-13 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5da58fec94..6292393f5c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,6 +23,7 @@ If you're looking for guidance on filing a bug report or getting support, please - [Building gems](#building-gems) - [Other utilities](#other-utilities) - [Rake tasks](#rake-tasks) +- [Making a release](#making-a-release) - [Code of Conduct](#code-of-conduct) @@ -57,7 +58,7 @@ Nokogiri is widely used in the Ruby ecosystem, and so extra care should be taken Please take a look at our [Issues marked "Help Wanted"](https://github.com/sparklemotion/nokogiri/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22). -Also, pull requests for documentation improvements are always welcome! +Also, [pull requests for documentation improvements are always welcome](#documentation)! ## Submitting Pull Requests @@ -78,7 +79,7 @@ Clone https://github.com/sparklemotion/nokogiri and run `bundle install`. ### Advanced -Please install the latest or previous version of CRuby (e.g., 3.0 or 2.7 as of 2021-02), and a recent version of JRuby. We recommend using a Ruby manager like `rvm` or `chruby` to make it easy to switch. +Please install the latest or previous version of CRuby (e.g., 3.1 or 3.0 as of 2022-01), and a recent version of JRuby. We recommend using `rbenv`, which is used in test scripts when necessary to test gems against multiple rubies. Please install a system version of libxml2/libxslt (see [Installing Nokogiri](https://nokogiri.org/tutorials/installing_nokogiri.html#installing-using-standard-system-libraries) for details) so that you can test against both the packaged libraries and your system libraries. @@ -134,10 +135,14 @@ NOKOGIRI_TEST_GC_LEVEL=normal bundle exec rake compile test # minor GC after each test NOKOGIRI_TEST_GC_LEVEL=minor bundle exec rake compile test -# major GC and a compaction after each test +# major GC after each test +NOKOGIRI_TEST_GC_LEVEL=major bundle exec rake compile test + +# major GC after each test and GC compaction after every 20 tests NOKOGIRI_TEST_GC_LEVEL=compact bundle exec rake compile test -# verify references after compaction after every test +# verify references after compaction after every 20 tests +# (see https://alanwu.space/post/check-compaction/) NOKOGIRI_TEST_GC_LEVEL=verify bundle exec rake compile test # run with GC "stress mode" on @@ -152,45 +157,83 @@ If you want to build Nokogiri against a modified version of libxml2, clone libxm That script also takes an optional command to run with the proper environment variables set to use the local libxml2 library, which can be useful when trying to `git bisect` against libxml2. +### gumbo HTML5 parser + +To run the test suite for the gumbo parser: + +``` sh +bundle exec rake gumbo +``` + +To make sure to run additional html5lib tests for Nokogiri's HTML5 parser: + +``` sh +git submodule update --init # test/html5lib-tests +bundle exec rake compile test +``` + ## Style Guide -I don't feel very strongly about code style, but when possible I follow [Shopify's Ruby Style Guide](https://shopify.github.io/ruby-style-guide/), and for C and Java code I use the `astyle` settings laid out in `/rakelib/format.rake`. +### Documentation -You can format the C and Java code with `rake format`. Maybe someday I'll auto-format Ruby, but not today. +We use `rdoc` to build Nokogiri's documentation. Run `rake rdoc` to build into the `./html` directory, and see the rdoc tasks in [rakelib/rdoc.rake](rakelib/rdoc.rake). -No, I don't want to talk to you about this. +Previously we made some effort to move towards `yard` but that work was stopped (and the decision record can be found at [RFC: convert to use `yard` for documentation](https://github.com/sparklemotion/nokogiri/issues/1996)). +I would prefer docstrings to be in `RDoc::Markup` format, though simple docstrings may be in Markdown (using `:markup: markdown`). + +If you submit pull requests that improve documentation, **I will happily merge them** and credit you in the CHANGELOG. + +Some guidelines (see [lib/nokogiri/xml/node.rb](lib/nokogiri/xml/node.rb) and [ext/nokogiri/xml/node.c](ext/nokogiri/xml/node.c) for examples): + +- use `:call-seq:` to: + - note the return type of the method whenever possible, e.g. `:call-seq: upcase(name) → String` + - to name all the aliases of a method + - to indicate block/yield usage of a method +- briefly explain the purpose of the method, what it returns, and what side effects it has +- use a `[Parameters]` definition to note the expected types of all the parameters as a bulleted list +- use a `[Returns]` definition to note the return type +- use a `[Yields]` definition to note the block parameters +- use a `⚠` character to warn the user about tricky usage +- use a `💡` character to call attention to important notes +- `See also:` should be used to call out related methods +- `Since` should be used to indicate the version that code was introduced +- prefer to show nuanced behavior in code examples, rather than by explaining it -## How Continuous Integration ("CI") is configured -This section could probably be an entire guide unto itself, so I'll try to be as brief as reasonable. +### Code -We currently have CI tests running in three places: +I don't feel very strongly about code style, but when possible I follow [Shopify's Ruby Style Guide](https://shopify.github.io/ruby-style-guide/), and for C and Java code I use the `astyle` settings laid out in `/rakelib/format.rake`. + +You can format the C, Java, and Ruby code with `rake format`. + +There are likely some pending Rubocop rules in `.rubocop_todo.yml` which I'd be happy to merge if you enabled them and submit a PR. + +No, I don't want to talk to you about any of the style choices. -- [Concourse](https://ci.nokogiri.org/?search=nokogiri): Linux, including many debugging and integration test -- [Github Actions](https://github.com/sparklemotion/nokogiri/actions/workflows/macos.yml): for MacOS only -- [Appveyor](https://ci.appveyor.com/project/flavorjones/nokogiri): for Windows only -This is ... not great. I'd love to set up everything to be in one place, but each has its advantages. It might be possible to move Windows testing to Github Actions, but honestly I'm kinda waiting for someone from the Ruby Windows community to figure that out. +## How Continuous Integration ("CI") is configured + +The bulk of CI is running in Github Actions since May 2021: https://github.com/sparklemotion/nokogiri/actions -I've set up "required" builds for the `main` branch in Github so that PRs can see and be bound by all these tests passing. +However, we also run tests against 32-bit windows (which aren't supported by GA as of this writing) in Appveyor: https://ci.appveyor.com/project/flavorjones/nokogiri -### Concourse +Please note that there are some known holes in CI coverage due to github actions limitations: -We run the bulk of our tests under Concourse. Concourse is great for me because +- installing ruby and native gems on 32-bit Linux, see: + - [actions/checkout error: /etc/*release "no such file or directory"](https://github.com/actions/checkout/issues/334) + - [actions/cache is not working as expected in 32-bit linux containers](https://github.com/actions/cache/issues/675) + - [actions/upload-artifact is not working as expected in 32-bit linux containers](https://github.com/actions/upload-artifact/issues/266) -- I can hijack a container if a test fails and poke around in it -- I can conditionally trigger the builds like a real pipeline -- I can run it locally on my dev machine -- I have complete control over the images used -The downside is, nobody in the Ruby community besides me and Dr. Nic know how to operate it or configure it. +### Coverage -In any case, the general pipeline we use is the same for `main` and for PRs includes: +The `ci.yml` pipeline includes jobs to: - basic security sanity check: run rubocop - fast feedback for obvious failures: run against system libraries on vanilla ubuntu +- run the gumbo parser tests on ubuntu, macos, and windows - run on all supported versions of CRuby: - once with packaged libraries - once with system libraries @@ -202,18 +245,23 @@ In any case, the general pipeline we use is the same for `main` and for PRs incl - run with libxml-ruby loaded (because this interacts with libxml2 in conflicting ways) - against system libraries - with valgrind using packaged libraries + +The `upstream.yml` pipeline includes jobs to: + +- run against CRuby head (linux, windows, macos) including valgrind +- run against JRuby head +- run against libxml2 and libxslt head (linux only today) including valgrind + +The `gem-install.yml` pipeline includes jobs to: + - build a "ruby" platform gem - - install and test on vanilla ubuntu - - install and test on musl -- build a native 64-bit linux gem - - install and test on vanilla ubuntu with all supported versions of CRuby - - install and test on musl -- build a native 32-bit linux gem - - install and test on vanilla ubuntu + - install and test on linux, macos, and windows +- build a native 64-bit gem (linux, macos, windows) + - install and test against all supported versions of CRuby - install and test on musl - build a jruby gem, install and test it -These pipelines are configured in `/concourse/nokogiri.yml` and `nokogiri-pr.yml`. Those files file are ... nontrivial, and I'm sorry about that. See https://github.com/flavorjones/concourse-gem for help. +The `truffle.yml` pipeline tests TruffleRuby nightlies with a few different compile-time flags. TruffleRuby support is still experimental due to Sulong limitations, and the test suite is exceedingly slow when run by TR, so this pipeline doesn't run on pushes and PRs. Instead, it runs periodically on a timer to give us some signal without slowing down developer feedback loops. ### Valgrind @@ -221,9 +269,11 @@ These pipelines are configured in `/concourse/nokogiri.yml` and `nokogiri-pr.yml We rely heavily on Valgrind to catch memory bugs by running in combination with every version of CRuby. We use suppressions, too -- because some Rubies seem to have memory issues? See the files in the `/suppressions` directory and `/rakelib/test.rake` for more information. -### TruffleRuby +### Conventions -As of 2021-02, TruffleRuby tests are in a separate pipeline because they are failing in known ways that we haven't addressed yet, mostly related to error handling in SAX callbacks due to Sulong limitations. +- Always checkout the source code including submodules (for the html5lib tests) +- When testing packaged libraries (not system libraries), cache either `ports/` (for compiled libraries) or `ports/archives/` (for just tarballs) + - note that `libgumbo` is built outside of `ports/` to allow us to do this caching safely ## Building gems @@ -249,6 +299,20 @@ There's a `Vagrantfile` in the project root which I've used once or twice to try The `Rakefile` used to be a big fat mess. It's now decomposed into a small set of files in `/rakelib`. If you've got a new rake task you'd like to introduce, please consider whether it belongs in one of the existing concerns, or needs a new file. Please don't add it to `Rakefile` without compelling reasons. +## Making a release + +A quick checklist: + +- [ ] make sure CI is green! +- [ ] update `CHANGELOG.md` and `lib/nokogiri/version/constant.rb` +- [ ] create a git tag +- [ ] run `scripts/build-gems` and make sure it completes and all the tests pass +- [ ] `for g in gems/*.gem ; do gem push $g ; done` +- [ ] create a release at https://github.com/sparklemotion/nokogiri/releases and provide sha2 checksums +- [ ] if security-related, email ruby-security-ann@googlegroups.com and ruby-talk@ruby-lang.org +- [ ] update nokogiri.org + + ## Code of Conduct Our full Code of Conduct is in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md). diff --git a/Gemfile b/Gemfile index b4e2a20bb6..be173b205f 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + source "https://rubygems.org" gemspec diff --git a/LICENSE-DEPENDENCIES.md b/LICENSE-DEPENDENCIES.md index f220b44def..d6811cccfe 100644 --- a/LICENSE-DEPENDENCIES.md +++ b/LICENSE-DEPENDENCIES.md @@ -4,81 +4,91 @@ Nokogiri ships with some third party dependencies, which are listed here along w Note that this document is broken into multiple sections, each of which describes the dependencies of a different "platform release" of Nokogiri. + + -- [Default platform release ("ruby")](#default-platform-release-ruby) -- [Native LinuxⓇ platform releases ("x86_64-linux" and "arm64-linux")](#native-linux%E2%93%A1-platform-releases-x86_64-linux-and-arm64-linux) -- [Native Darwin (macOSⓇ) platform releases ("x86_64-darwin" and "arm64-darwin")](#native-darwin-macos%E2%93%A1-platform-releases-x86_64-darwin-and-arm64-darwin) -- [Native WindowsⓇ platform releases ("x86-mingw32" and "x64-mingw32")](#native-windows%E2%93%A1-platform-releases-x86-mingw32-and-x64-mingw32) -- [JavaⓇ (JRuby) platform release ("java")](#java%E2%93%A1-jruby-platform-release-java) +- [Platform Releases](#platform-releases) + * [Default platform release ("ruby")](#default-platform-release-ruby) + * [Native LinuxⓇ platform releases ("x86_64-linux" and "arm64-linux")](#native-linux%E2%93%A1-platform-releases-x86_64-linux-and-arm64-linux) + * [Native Darwin (macOSⓇ) platform releases ("x86_64-darwin" and "arm64-darwin")](#native-darwin-macos%E2%93%A1-platform-releases-x86_64-darwin-and-arm64-darwin) + * [Native WindowsⓇ platform releases ("x86-mingw32" and "x64-mingw32")](#native-windows%E2%93%A1-platform-releases-x86-mingw32-and-x64-mingw32) + * [JavaⓇ (JRuby) platform release ("java")](#java%E2%93%A1-jruby-platform-release-java) - [Appendix: Dependencies' License Texts](#appendix-dependencies-license-texts) - * [libxml2](#libxml2) - * [libxslt](#libxslt) - * [zlib](#zlib) - * [libiconv](#libiconv) - * [isorelax](#isorelax) - * [jing](#jing) - * [nekodtd](#nekodtd) - * [nekohtml](#nekohtml) - * [xalan](#xalan) - * [xerces](#xerces) - * [xml-apis](#xml-apis) + * [libgumbo and nokogumbo](#libgumbo-and-nokogumbo) + * [libxml2](#libxml2) + * [libxslt](#libxslt) + * [zlib](#zlib) + * [libiconv](#libiconv) + * [isorelax](#isorelax) + * [jing](#jing) + * [nekodtd](#nekodtd) + * [nekohtml](#nekohtml) + * [xalan](#xalan) + * [xerces](#xerces) + * [xml-apis](#xml-apis) -It's encouraged for anyone consuming this file via license-tracking software to understand which gem file you're downloading and using, so as not to misinterpret the contents of this file and the licenses of the software being distributed. +Anyone consuming this file via license-tracking software should endeavor to understand which gem file you're downloading and using, so as not to misinterpret the contents of this file and the licenses of the software being distributed. You can double-check the dependencies in your gem file by examining the output of `nokogiri -v` after installation, which will emit the complete set of libraries in use (for versions `>= 1.11.0.rc4`). In particular, I'm sure somebody's lawyer, somewhere, is going to freak out that the LGPL appears in this file; and so I'd like to take special note that the dependency covered by LGPL, `libiconv`, is only being redistributed in the native Windows and native Darwin platform releases. It's not present in default, JavaⓇ, or native LinuxⓇ releases. -## Default platform release ("ruby") +## Platform Releases + +### Default platform release ("ruby") The default platform release distributes the following dependencies in source form: - [libxml2](#libxml2) - [libxslt](#libxslt) +- [libgumbo and nokogumbo](#libgumbo-and-nokogumbo) This distribution can be identified by inspecting the included Gem::Specification, which will have the value "ruby" for its "platform" attribute. -## Native LinuxⓇ platform releases ("x86_64-linux" and "arm64-linux") +### Native LinuxⓇ platform releases ("x86_64-linux" and "arm64-linux") The native LinuxⓇ platform release distributes the following dependencies in source form: - [libxml2](#libxml2) - [libxslt](#libxslt) +- [libgumbo and nokogumbo](#libgumbo-and-nokogumbo) - [zlib](#zlib) This distribution can be identified by inspecting the included Gem::Specification, which will have a value similar to "x86_64-linux" or "x86-linux" for its "platform.cpu" attribute. -## Native Darwin (macOSⓇ) platform releases ("x86_64-darwin" and "arm64-darwin") +### Native Darwin (macOSⓇ) platform releases ("x86_64-darwin" and "arm64-darwin") The native Darwin platform release distributes the following dependencies in source form: - [libxml2](#libxml2) - [libxslt](#libxslt) +- [libgumbo and nokogumbo](#libgumbo-and-nokogumbo) - [zlib](#zlib) - [libiconv](#libiconv) This distribution can be identified by inspecting the included Gem::Specification, which will have a value similar to "x86_64-darwin" or "arm64-darwin" for its "platform.cpu" attribute. Darwin is also known more familiarly as "OSX" or "macOSⓇ" and is the operating system for many AppleⓇ computers. -## Native WindowsⓇ platform releases ("x86-mingw32" and "x64-mingw32") +### Native WindowsⓇ platform releases ("x86-mingw32" and "x64-mingw32") The native WindowsⓇ platform release distributes the following dependencies in source form: - [libxml2](#libxml2) - [libxslt](#libxslt) +- [libgumbo and nokogumbo](#libgumbo-and-nokogumbo) - [zlib](#zlib) - [libiconv](#libiconv) This distribution can be identified by inspecting the included Gem::Specification, which will have a value similar to "x64-mingw32" or "x86-mingw32" for its "platform.cpu" attribute. -## JavaⓇ (JRuby) platform release ("java") +### JavaⓇ (JRuby) platform release ("java") The Java platform release distributes the following dependencies as compiled jar files: @@ -99,6 +109,217 @@ This section contains a subsection for each potentially-distributed dependency, Please see previous sections to understand which of these potential dependencies is actually distributed in the gem file you're downloading and using. + +### libgumbo and nokogumbo + +Apache 2.0 + +https://github.com/rubys/nokogumbo/blob/f6a7412/LICENSE.txt + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + ### libxml2 MIT diff --git a/LICENSE.md b/LICENSE.md index 50b773dd8c..6a58f6aeda 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ The MIT License -Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney. +Copyright 2008 -- 2021 by Mike Dalessio, Aaron Patterson, Yoko Harada, Akinori MUSHA, John Shahid, Karol Bucek, Sam Ruby, Craig Barnes, Stephen Checkoway, Lars Kanis, Sergio Arbeo, Timothy Elliott, Nobuyoshi Nakada, Charles Nutter, Patrick Mahoney. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index 1844ed6821..170ded35f1 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # Nokogiri -Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for reading, writing, modifying, and querying documents. It is fast and standards-compliant by relying on native parsers like libxml2 (C) and xerces (Java). +Nokogiri (鋸) makes it easy and painless to work with XML and HTML from Ruby. It provides a sensible, easy-to-understand API for [reading](https://nokogiri.org/tutorials/parsing_an_html_xml_document.html), writing, [modifying](https://nokogiri.org/tutorials/modifying_an_html_xml_document.html), and [querying](https://nokogiri.org/tutorials/searching_a_xml_html_document.html) documents. It is fast and standards-compliant by relying on native parsers like libxml2 (CRuby) and xerces (JRuby). ## Guiding Principles @@ -14,7 +14,7 @@ Some guiding principles Nokogiri tries to follow: ## Features Overview -- DOM Parser for XML and HTML4 +- DOM Parser for XML, HTML4, and HTML5 - SAX Parser for XML and HTML4 - Push Parser for XML and HTML4 - Document search via XPath 1.0 @@ -26,13 +26,13 @@ Some guiding principles Nokogiri tries to follow: ## Status -[![Concourse CI](https://ci.nokogiri.org/api/v1/teams/nokogiri-core/pipelines/nokogiri/jobs/cruby-2.7/badge)](https://ci.nokogiri.org/teams/nokogiri-core/pipelines/nokogiri) +[![Github Actions CI](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/sparklemotion/nokogiri/actions/workflows/ci.yml) [![Appveyor CI](https://ci.appveyor.com/api/projects/status/xj2pqwvlxwuwgr06/branch/main?svg=true)](https://ci.appveyor.com/project/flavorjones/nokogiri/branch/main) -[![Code Climate](https://codeclimate.com/github/sparklemotion/nokogiri.svg)](https://codeclimate.com/github/sparklemotion/nokogiri) -[![Test Coverage](https://api.codeclimate.com/v1/badges/59c67b0e8976027a45ad/test_coverage)](https://codeclimate.com/github/sparklemotion/nokogiri/test_coverage) [![Gem Version](https://badge.fury.io/rb/nokogiri.svg)](https://rubygems.org/gems/nokogiri) -[![SemVer compatibility](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler) +[![SemVer compatibility](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores) + +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5344/badge)](https://bestpractices.coreinfrastructure.org/projects/5344) [![Tidelift dependencies](https://tidelift.com/badges/package/rubygems/nokogiri)](https://tidelift.com/subscription/pkg/rubygems-nokogiri?utm_source=rubygems-nokogiri&utm_medium=referral&utm_campaign=readme) @@ -80,7 +80,7 @@ Full information and description of our security policy is in [`SECURITY.md`](SE ### Semantic Versioning Policy -Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://api.dependabot.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&version-scheme=semver)](https://dependabot.com/compatibility-score/?dependency-name=nokogiri&package-manager=bundler) +Nokogiri follows [Semantic Versioning](https://semver.org/) (since 2017 or so). [![Dependabot's SemVer compatibility score for Nokogiri](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=nokogiri&package-manager=bundler&previous-version=1.11.7&new-version=1.12.5)](https://docs.github.com/en/code-security/supply-chain-security/managing-vulnerabilities-in-your-projects-dependencies/about-dependabot-security-updates#about-compatibility-scores) We bump `Major.Minor.Patch` versions following this guidance: @@ -107,8 +107,8 @@ We bump `Major.Minor.Patch` versions following this guidance: Requirements: -- Ruby >= 2.5 -- JRuby >= 9.2.0.0 +- Ruby >= 2.6 +- JRuby >= 9.3.0.0 ### Native Gems: Faster, more reliable installation @@ -117,12 +117,12 @@ Requirements: ### Supported Platforms -As of v1.11.0, Nokogiri ships pre-compiled, "native" gems for the following platforms: +Nokogiri ships pre-compiled, "native" gems for the following platforms: - Linux: `x86-linux` and `x86_64-linux` (req: `glibc >= 2.17`), including musl platforms like Alpine - Darwin/MacOS: `x86_64-darwin` and `arm64-darwin` - Windows: `x86-mingw32` and `x64-mingw32` -- Java: any platform running JRuby 9.2 or higher +- Java: any platform running JRuby 9.3 or higher To determine whether your system supports one of these gems, look at the output of `bundle platform` or `ruby -e 'puts Gem::Platform.local.to_s'`. @@ -271,6 +271,9 @@ Some additional libraries may be distributed with your version of Nokogiri. Plea - Akinori MUSHA - John Shahid - Karol Bucek +- Sam Ruby +- Craig Barnes +- Stephen Checkoway - Lars Kanis - Sergio Arbeo - Timothy Elliott diff --git a/Rakefile b/Rakefile index 4a295c88c7..1638c8625a 100644 --- a/Rakefile +++ b/Rakefile @@ -1,4 +1,5 @@ # frozen_string_literal: true + # # Tasks are all loaded from `rakelib/*.rake`. # You may want to use `rake -T` to see what's available. @@ -6,4 +7,4 @@ require "bundler" NOKOGIRI_SPEC = Bundler.load_gemspec("nokogiri.gemspec") -task default: [:rubocop, :compile, :test] +task default: [:rubocop, :gumbo, :compile, :test] diff --git a/Vagrantfile b/Vagrantfile index 50d95c35af..87056691e3 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,3 +1,5 @@ +# frozen_string_literal: true + # -*- mode: ruby -*- # vi: set ft=ruby : @@ -24,10 +26,13 @@ boxen << Box.new("bionic32", "mkorenkov/ubuntu-bionic32", <<~EOF) apt-get install -y libxslt-dev libxml2-dev pkg-config apt-get install -y ruby ruby-dev bundler git EOF +boxen << Box.new("freebsd", "freebsd/FreeBSD-13.0-CURRENT", <<~EOF) + pkg install rbenv ruby-build +EOF Vagrant.configure("2") do |config| boxen.each do |box| - config.vm.define box.shortname do |config| + config.vm.define(box.shortname) do |config| config.vm.box = box.name # Share an additional folder to the guest VM. The first argument is @@ -36,15 +41,15 @@ Vagrant.configure("2") do |config| # argument is a set of non-required options. # config.vm.synced_folder "../data", "/vagrant_data" - config.vm.provider "virtualbox" do |vb| - vb.customize ["modifyvm", :id, "--cpus", 2] - vb.customize ["modifyvm", :id, "--memory", 1024] + config.vm.provider("virtualbox") do |vb| + vb.customize(["modifyvm", :id, "--cpus", 2]) + vb.customize(["modifyvm", :id, "--memory", 1024]) end - config.vm.synced_folder ".", "/nokogiri" + config.vm.synced_folder(".", "/nokogiri") if box.provision - config.vm.provision "shell", inline: box.provision + config.vm.provision("shell", inline: box.provision) end end end diff --git a/appveyor.yml b/appveyor.yml index 64f99fd863..f639a3335b 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -22,6 +22,7 @@ install: - ruby --version - gem --version - gem install bundler --conservative + - git submodule update --init - bundle config --local path vendor/bundle - bundle install - IF DEFINED INSTALL_PACKAGES ( ridk exec pacman --noconfirm --needed --sync %INSTALL_PACKAGES% ) @@ -33,36 +34,12 @@ test_script: environment: matrix: - - ruby_version: head-x64 - - ruby_version: head-x64 - INSTALL_PACKAGES: "mingw-w64-x86_64-libxslt" - EXTCONF_PARAMS: "--use-system-libraries" - - - ruby_version: 30-x64 - - ruby_version: 30-x64 - INSTALL_PACKAGES: "mingw-w64-x86_64-libxslt" - EXTCONF_PARAMS: "--use-system-libraries" - - - ruby_version: 27-x64 - - ruby_version: 27-x64 - INSTALL_PACKAGES: "mingw-w64-x86_64-libxslt" - EXTCONF_PARAMS: "--use-system-libraries" - - - ruby_version: 27 - - ruby_version: 27 + - ruby_version: 30 + - ruby_version: 30 INSTALL_PACKAGES: "mingw-w64-i686-libxslt" EXTCONF_PARAMS: "--use-system-libraries" - - ruby_version: 26-x64 - - ruby_version: 26-x64 - INSTALL_PACKAGES: "mingw-w64-x86_64-libxslt" - EXTCONF_PARAMS: "--use-system-libraries" - - - ruby_version: 25-x64 - - ruby_version: 25-x64 - INSTALL_PACKAGES: "mingw-w64-x86_64-libxslt" + - ruby_version: 26 + - ruby_version: 26 + INSTALL_PACKAGES: "mingw-w64-i686-libxslt" EXTCONF_PARAMS: "--use-system-libraries" - -matrix: - allow_failures: - - ruby_version: head-x64 diff --git a/bin/nokogiri b/bin/nokogiri index 60b1eb3cea..04a5ceae19 100755 --- a/bin/nokogiri +++ b/bin/nokogiri @@ -1,61 +1,77 @@ #!/usr/bin/env ruby -require 'optparse' -require 'open-uri' -require 'uri' -require 'rubygems' -require 'nokogiri' -autoload :IRB, 'irb' +# frozen_string_literal: true + +require "optparse" +require "open-uri" +require "uri" +require "rubygems" +require "nokogiri" +autoload :IRB, "irb" parse_class = Nokogiri encoding = nil # This module provides some tunables with the nokogiri CLI for use in # your ~/.nokogirirc. -module Nokogiri::CLI - class << self - # Specify the console engine, defaulted to IRB. - # - # call-seq: - # require 'pry' - # Nokogiri::CLI.console = Pry - attr_writer :console - - def console - case @console - when Symbol - Kernel.const_get(@console) - else - @console +module Nokogiri + module CLI + class << self + # Specify the console engine, defaulted to IRB. + # + # call-seq: + # require 'pry' + # Nokogiri::CLI.console = Pry + attr_writer :console + + def console + case @console + when Symbol + Kernel.const_get(@console) + else + @console + end end + + attr_accessor :rcfile end - attr_accessor :rcfile + self.rcfile = File.expand_path("~/.nokogirirc") + self.console = :IRB end +end - self.rcfile = File.expand_path('~/.nokogirirc') - self.console = :IRB +def safe_read(uri_or_path) + uri = URI.parse(uri_or_path) + case uri + when URI::HTTP + uri.read + when URI::File + File.read(uri.path) + else + File.read(uri_or_path) + end end opts = OptionParser.new do |opts| opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser" - opts.define_head "Usage: nokogiri [options]" - opts.separator "" - opts.separator "Examples:" - opts.separator " nokogiri https://www.ruby-lang.org/" - opts.separator " nokogiri ./public/index.html" - opts.separator " curl -s http://www.nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'" - opts.separator "" - opts.separator "Options:" + opts.define_head("Usage: nokogiri [options]") + opts.separator("") + opts.separator("Examples:") + opts.separator(" nokogiri https://www.ruby-lang.org/") + opts.separator(" nokogiri ./public/index.html") + opts.separator(" curl -s http://www.nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'") + opts.separator("") + opts.separator("Options:") opts.on("--type type", "Parse as type: xml or html (default: auto)", [:xml, :html]) do |v| - parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v] + parse_class = { xml: Nokogiri::XML, html: Nokogiri::HTML }[v] end opts.on("-C file", "Specifies initialization file to load (default #{Nokogiri::CLI.rcfile})") do |v| Nokogiri::CLI.rcfile = v end - opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || 'none'})") do |v| + opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || "none"})") do |v| encoding = v end @@ -64,7 +80,7 @@ opts = OptionParser.new do |opts| end opts.on("--rng ", "Validate using this rng file.") do |v| - @rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)} + @rng = Nokogiri::XML::RelaxNG(safe_read(v)) end opts.on_tail("-?", "--help", "Show this message") do @@ -90,15 +106,10 @@ if File.file?(Nokogiri::CLI.rcfile) load Nokogiri::CLI.rcfile end -if url || $stdin.tty? - case uri = (URI(url) rescue url) - when URI::HTTP - @doc = parse_class.parse(uri.read, url, encoding) - else - @doc = parse_class.parse(open(url).read, nil, encoding) - end +@doc = if url || $stdin.tty? + parse_class.parse(safe_read(url), url, encoding) else - @doc = parse_class.parse($stdin, nil, encoding) + parse_class.parse($stdin, nil, encoding) end $_ = @doc @@ -107,12 +118,14 @@ if @rng @rng.validate(@doc).each do |error| puts error.message end -else - if @script - eval @script, binding, '
' - else - puts "Your document is stored in @doc..." - Nokogiri::CLI.console.start +elsif @script + begin + eval(@script, binding, "
") # rubocop:disable Security/Eval + rescue Exception => e # rubocop:disable Lint/RescueException + warn("ERROR: Exception raised while evaluating '#{@script}'") + raise e end +else + puts "Your document is stored in @doc..." + Nokogiri::CLI.console.start end - diff --git a/concourse/TODO.md b/concourse/TODO.md deleted file mode 100644 index 0578280659..0000000000 --- a/concourse/TODO.md +++ /dev/null @@ -1,48 +0,0 @@ -# nokogiri concourse to-do - -## concourse.yml - -* [x] real ssl cert -* [x] github authentication -* [x] bastion host -* [x] upgrade bbl - -## nokogiri.yml - -* [x] test using system libraries -* [x] handle pull requests -* [x] run windows tests under devkit -* [ ] osx - * system - * system-homebrew - * vendored -* [ ] build an rc gem and upload to rubygems [→ rubygems resource] - * should always check manifest -* install gem and test: - * [ ] osx - * [ ] linux (system) - * [ ] linux (vendored) - * [ ] linux (vendored, --disable-static) - * [ ] openSUSE with site_config (lib64, #1562) - * [ ] windows (fat binary) - * [ ] windows (devkit) -* notifications on failure / success - * [x] irc [→ irc resource] - -## other projects - -* [x] pipeline: mini_portile [→ bosh release] -* [x] pipeline: chromedriver-helper -* [x] bosh release for windows worker config: - * [ ] ruby of all supported versions - * [ ] devkit installed in all rubies - * [ ] cmake -* [x] resource: irc -* [ ] resource: rubygems - -## nokogiri stretch goals - -* [ ] get openbsd / freebsd / etc. people to donate worker machines -* [ ] use an S3 bucket for sub-artifacts: - * source tarballs - * compiled .dlls diff --git a/concourse/common_prelude.rb b/concourse/common_prelude.rb deleted file mode 100644 index 6fce7f4d7f..0000000000 --- a/concourse/common_prelude.rb +++ /dev/null @@ -1,6 +0,0 @@ -require "json" - -cross_rubies_path = File.join(File.dirname(__FILE__), "..", ".cross_rubies") -$native_ruby_versions = File.read(cross_rubies_path).split("\n").map do |line| - line.split(":").first.split(".").take(2).join(".") -end.uniq.sort diff --git a/concourse/config/wrap-pull-request-jobs.yml b/concourse/config/wrap-pull-request-jobs.yml deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/concourse/images/Dockerfile.alpine32.erb b/concourse/images/Dockerfile.alpine32.erb deleted file mode 100644 index 382cedc247..0000000000 --- a/concourse/images/Dockerfile.alpine32.erb +++ /dev/null @@ -1,13 +0,0 @@ -FROM i386/ruby:alpine3.12 - -# prelude -RUN apk update -RUN apk add bash build-base - -# valgrind -RUN apk add valgrind - -# libxml-et-al -RUN apk add libxml2-dev libxslt-dev pkgconfig - -<%= File.read "bundle-install.step" %> diff --git a/concourse/images/Dockerfile.jruby.erb b/concourse/images/Dockerfile.jruby.erb deleted file mode 100644 index 49c8ac2ae9..0000000000 --- a/concourse/images/Dockerfile.jruby.erb +++ /dev/null @@ -1,7 +0,0 @@ -FROM jruby:<%= version %>-jdk - -<%= File.read "java-opts.step" %> - -<%= File.read "update-bundler.step" %> - -<%= File.read "bundle-install.step" %> diff --git a/concourse/images/debian-libxml-et-al.step b/concourse/images/debian-libxml-et-al.step deleted file mode 100644 index 5f73f12f3b..0000000000 --- a/concourse/images/debian-libxml-et-al.step +++ /dev/null @@ -1,3 +0,0 @@ -# -*- dockerfile -*- - -RUN apt-get install -y libxslt-dev libxml2-dev pkg-config diff --git a/concourse/images/java-opts.step b/concourse/images/java-opts.step deleted file mode 100644 index cc7d0c7747..0000000000 --- a/concourse/images/java-opts.step +++ /dev/null @@ -1,4 +0,0 @@ -# -*- dockerfile -*- - -# https://github.com/docker-library/openjdk/issues/32 -ENV JAVA_OPTS="-Dfile.encoding=UTF8" diff --git a/concourse/nokogiri-pr.yml b/concourse/nokogiri-pr.yml deleted file mode 100644 index 5abe474bc1..0000000000 --- a/concourse/nokogiri-pr.yml +++ /dev/null @@ -1,433 +0,0 @@ -#@ load("@ytt:template", "template") - -#@ load("ruby.star", "cruby_versions") -#@ all_cruby_versions = [] -#@ all_cruby_versions.extend(cruby_versions["supported"]) -#@ all_cruby_versions.extend(cruby_versions["beta"]) - -#@ load("ruby.star", "jruby_versions") -#@ all_jruby_versions = [] -#@ all_jruby_versions.extend(jruby_versions["supported"]) -#@ all_jruby_versions.extend(jruby_versions["beta"]) - -#@ load("ruby.star", "truffleruby_versions") -#@ all_truffleruby_versions = [] -#@ all_truffleruby_versions.extend(truffleruby_versions["supported"]) -#@ all_truffleruby_versions.extend(truffleruby_versions["beta"]) - ---- -#@ def registry_image(image_repo, image_tag): -platform: linux -image_resource: - type: registry-image - source: - repository: #@ image_repo - tag: #@ image_tag -#@ end - ---- -#@ def task_inputs(): -- name: ci -- name: nokogiri-pr - path: nokogiri -#@ end - ---- -#@ def pend_pr(pr_resource_name, context): -put: #@ pr_resource_name -params: - path: #@ pr_resource_name - base_context: ci.nokogiri.org - status: PENDING - context: #@ context - description: "Nokobot is working, beep boop ..." -#@ end - ---- -#@ def put_to_pr_on_everything(pr_resource_name, context): -on_failure: - put: #@ pr_resource_name - params: - path: #@ pr_resource_name - base_context: ci.nokogiri.org - status: FAILURE - context: #@ context - description: "This job had some failed tests." -on_error: - put: #@ pr_resource_name - params: - path: #@ pr_resource_name - base_context: ci.nokogiri.org - status: ERROR - context: #@ context - description: "This job had errors." -on_abort: - put: #@ pr_resource_name - params: - path: #@ pr_resource_name - base_context: ci.nokogiri.org - status: ERROR - context: #@ context - description: "This job was terminated." -on_success: - put: #@ pr_resource_name - params: - path: #@ pr_resource_name - base_context: ci.nokogiri.org - status: SUCCESS - context: #@ context - description: "Nokobot is happy with this job." -#@ end - ---- -% require "common_prelude.rb" - -resource_types: - - name: pull-request - type: registry-image - source: - repository: teliaoss/github-pr-resource - - -resources: - - name: ci - type: git - icon: cog - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: main - disable_ci_skip: true #! always get the latest pipeline configuration - - name: nokogiri-pr - type: pull-request - icon: github - check_every: 15m - webhook_token: ((nokogiri-pr-webhook-token)) - source: - repository: sparklemotion/nokogiri - access_token: ((github-repo-status-access-token)) - ignore_drafts: true - ignore_paths: - - "*.md" - - "concourse/" - - "suppressions/" - - ".github/" - - "Vagrantfile" - - -jobs: -#@ job_name = "rubocop" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - - #@ pend_pr("nokogiri-pr", job_name) - - task: rubocop - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic")) - inputs: #@ task_inputs() - run: - path: ci/concourse/tasks/rake-test/rubocop.sh - - -#@ job_name = "cruby-on-vanilla-ubuntu" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["rubocop"] - - #@ pend_pr("nokogiri-pr", job_name) - - in_parallel: - - task: rake-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic")) - inputs: #@ task_inputs() - params: - TEST_WITH_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - - task: rake-test-32bit - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic32")) - inputs: #@ task_inputs() - params: - TEST_WITH_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - - -#@ for ruby_version in all_cruby_versions: -#@ job_name = "cruby-{}".format(ruby_version) - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["cruby-on-vanilla-ubuntu"] - - #@ pend_pr("nokogiri-pr", job_name) - - in_parallel: - - task: rake-test-system-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(ruby_version))) - inputs: #@ task_inputs() - params: - TEST_WITH_SYSTEM_LIBRARIES: t - #@ if/end ruby_version == cruby_versions["supported"][-1]: - CC_TEST_REPORTER_ID: ((code_climate_reporter_id_nokogiri)) - run: - path: ci/concourse/tasks/rake-test/run.sh - - task: rake-test-vendored-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(ruby_version))) - inputs: #@ task_inputs() - run: - path: ci/concourse/tasks/rake-test/run.sh - - task: rake-test-valgrind - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(ruby_version))) - inputs: #@ task_inputs() - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh -#@ end - -#@ for ruby_version in all_jruby_versions: -#@ job_name = "jruby-{}".format(ruby_version) - - name: #@ job_name - public: true - #@ if/end ruby_version in jruby_versions["supported"]: - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: ["cruby-on-vanilla-ubuntu"] - - #@ pend_pr("nokogiri-pr", job_name) - - task: rake-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "jruby-{}".format(ruby_version))) - inputs: #@ task_inputs() - run: - path: ci/concourse/tasks/rake-test/run.sh -#@ end - -#@ job_name = "cruby-on-musl" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - #@ pend_pr("nokogiri-pr", job_name) - - in_parallel: - - task: rake-test-system-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: #@ task_inputs() - params: - TEST_WITH_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - - task: rake-test-valgrind - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: #@ task_inputs() - params: - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - - -#@ job_name = "cruby-with-libxmlruby" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - #@ pend_pr("nokogiri-pr", job_name) - - in_parallel: - - task: rake-test-system-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(cruby_versions["supported"][-1]))) - inputs: #@ task_inputs() - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_SYSTEM_LIBRARIES: t - run: - path: ci/concourse/tasks/rake-test/run.sh - - task: rake-test-valgrind - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(cruby_versions["supported"][-1]))) - inputs: #@ task_inputs() - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_VALGRIND: t - run: - path: ci/concourse/tasks/rake-test/run.sh - - -#@ job_name = "cruby-gem-test" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - #@ pend_pr("nokogiri-pr", job_name) - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-mri-x86_64-linux", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: #@ task_inputs() - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - - in_parallel: - - task: install-and-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(cruby_versions["supported"][-1]))) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - - task: install-and-test-on-musl - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - - -#@ job_name = "cruby-native-gem-test" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - #@ pend_pr("nokogiri-pr", job_name) - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-mri-x86_64-linux", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: #@ task_inputs() - outputs: - - name: gems - params: - BUILD_NATIVE_GEM: "x86_64-linux" - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - - in_parallel: - <% $native_ruby_versions.each do |ruby_version| %> - - task: install-and-test-<%= ruby_version %> - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-<%= ruby_version %>")) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - <% end %> - - task: install-and-test-on-musl - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - - -#@ job_name = "cruby-native-gem-test-32bit" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - #@ pend_pr("nokogiri-pr", job_name) - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-mri-x86-linux", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: #@ task_inputs() - outputs: - - name: gems - params: - BUILD_NATIVE_GEM: "x86-linux" - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - - in_parallel: - - task: install-and-test-on-vanilla-ubuntu-32bit - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic32")) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - - task: install-and-test-on-musl-32bit - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine32")) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - - -#@ job_name = "jruby-gem-test" - - name: #@ job_name - public: true - _: #@ template.replace(put_to_pr_on_everything("nokogiri-pr", job_name)) - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - passed: #@ ["jruby-{}".format(ruby_version) for ruby_version in jruby_versions["supported"]] - - #@ pend_pr("nokogiri-pr", job_name) - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-jruby", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: #@ task_inputs() - outputs: - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - - task: install-and-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "jruby-{}".format(jruby_versions["supported"][-1]))) - inputs: - - #@ template.replace(task_inputs()) - - name: gems - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh diff --git a/concourse/nokogiri-pr.yml.generated b/concourse/nokogiri-pr.yml.generated deleted file mode 100644 index 5f11b032e8..0000000000 --- a/concourse/nokogiri-pr.yml.generated +++ /dev/null @@ -1,1248 +0,0 @@ -jobs: -- name: rubocop - on_abort: - params: - base_context: ci.nokogiri.org - context: rubocop - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: rubocop - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: rubocop - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: rubocop - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: rubocop - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/rubocop.sh - task: rubocop - public: true -- name: cruby-on-vanilla-ubuntu - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-on-vanilla-ubuntu - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-on-vanilla-ubuntu - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-on-vanilla-ubuntu - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-on-vanilla-ubuntu - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - rubocop - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-on-vanilla-ubuntu - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic32 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-32bit - public: true -- name: cruby-2.5 - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-2.5 - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-2.5 - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-2.5 - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-2.5 - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-on-vanilla-ubuntu - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-2.5 - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-2.6 - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-2.6 - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-2.6 - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-2.6 - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-2.6 - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-on-vanilla-ubuntu - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-2.6 - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-2.7 - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-2.7 - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-2.7 - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-2.7 - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-2.7 - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-on-vanilla-ubuntu - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-2.7 - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-3.0 - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-3.0 - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-3.0 - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-3.0 - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-3.0 - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-on-vanilla-ubuntu - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-3.0 - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - CC_TEST_REPORTER_ID: ((code_climate_reporter_id_nokogiri)) - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: jruby-9.2 - on_abort: - params: - base_context: ci.nokogiri.org - context: jruby-9.2 - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: jruby-9.2 - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: jruby-9.2 - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: jruby-9.2 - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-on-vanilla-ubuntu - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: jruby-9.2 - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: jruby-9.2 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test - public: true -- name: cruby-on-musl - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-on-musl - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-on-musl - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-on-musl - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-on-musl - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - - params: - base_context: ci.nokogiri.org - context: cruby-on-musl - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-with-libxmlruby - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-with-libxmlruby - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-with-libxmlruby - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-with-libxmlruby - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-with-libxmlruby - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-with-libxmlruby - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - BUNDLE_GEMFILE: Gemfile-libxml-ruby - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - BUNDLE_GEMFILE: Gemfile-libxml-ruby - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-gem-test - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-gem-test - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-gem-test - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-gem-test - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-gem-test - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-gem-test - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-mri-x86_64-linux - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - task: build - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-musl - public: true -- name: cruby-native-gem-test - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-mri-x86_64-linux - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - params: - BUILD_NATIVE_GEM: x86_64-linux - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - task: build - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-2.5 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-2.6 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-2.7 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-3.0 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-musl - public: true -- name: cruby-native-gem-test-32bit - on_abort: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test-32bit - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test-32bit - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test-32bit - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test-32bit - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: cruby-native-gem-test-32bit - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-mri-x86-linux - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - params: - BUILD_NATIVE_GEM: x86-linux - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - task: build - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic32 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-vanilla-ubuntu-32bit - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine32 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-musl-32bit - public: true -- name: jruby-gem-test - on_abort: - params: - base_context: ci.nokogiri.org - context: jruby-gem-test - description: This job was terminated. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_error: - params: - base_context: ci.nokogiri.org - context: jruby-gem-test - description: This job had errors. - path: nokogiri-pr - status: ERROR - put: nokogiri-pr - on_failure: - params: - base_context: ci.nokogiri.org - context: jruby-gem-test - description: This job had some failed tests. - path: nokogiri-pr - status: FAILURE - put: nokogiri-pr - on_success: - params: - base_context: ci.nokogiri.org - context: jruby-gem-test - description: Nokobot is happy with this job. - path: nokogiri-pr - status: SUCCESS - put: nokogiri-pr - plan: - - get: ci - - get: nokogiri-pr - passed: - - jruby-9.2 - trigger: true - version: every - - params: - base_context: ci.nokogiri.org - context: jruby-gem-test - description: Nokobot is working, beep boop ... - path: nokogiri-pr - status: PENDING - put: nokogiri-pr - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-jruby - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - outputs: - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - task: build - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: jruby-9.2 - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test - public: true -resource_types: -- name: pull-request - source: - repository: teliaoss/github-pr-resource - type: registry-image -resources: -- icon: cog - name: ci - source: - branch: main - disable_ci_skip: true - uri: https://github.com/sparklemotion/nokogiri/ - type: git -- check_every: 15m0s - icon: github - name: nokogiri-pr - source: - access_token: ((github-repo-status-access-token)) - ignore_drafts: true - ignore_paths: - - '*.md' - - concourse/ - - suppressions/ - - .github/ - - Vagrantfile - repository: sparklemotion/nokogiri - type: pull-request - webhook_token: ((nokogiri-pr-webhook-token)) diff --git a/concourse/nokogiri-truffleruby.yml b/concourse/nokogiri-truffleruby.yml deleted file mode 100644 index 23b62920ac..0000000000 --- a/concourse/nokogiri-truffleruby.yml +++ /dev/null @@ -1,119 +0,0 @@ -#@ load("@ytt:template", "template") - -#@ load("ruby.star", "truffleruby_versions") -#@ all_truffleruby_versions = [] -#@ all_truffleruby_versions.extend(truffleruby_versions["beta"]) -#! 2021-03-10 omit stable until the changes related to nokogiri are shipping in v22 -#! #@ all_truffleruby_versions.extend(truffleruby_versions["supported"]) - -#@ compile_flag_combos = [["--disable-system-libraries", "--disable-static"], -#@ ["--disable-system-libraries", "--enable-static"], -#@ ["--enable-system-libraries"]] - ---- -#@ def registry_image(image_repo, image_tag): -platform: linux -image_resource: - type: registry-image - source: - repository: #@ image_repo - tag: #@ image_tag -#@ end - ---- -resource_types: - - name: pull-request - type: registry-image - source: - repository: teliaoss/github-pr-resource - - -resources: - - name: ci - type: git - icon: cog - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: main - disable_ci_skip: true #! always get the latest pipeline configuration - - name: nokogiri - type: git - icon: "github" - check_every: 5m - webhook_token: ((nokogiri-main-webhook-token)) - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: main - ignore_paths: - - "*.md" - - "concourse/**" - - "suppressions/**" - - ".github/**" - - "Vagrantfile" - - name: nokogiri-pr - type: pull-request - icon: github - check_every: 15m - webhook_token: ((nokogiri-pr-webhook-token)) - source: - repository: sparklemotion/nokogiri - access_token: ((github-repo-status-access-token)) - ignore_drafts: true - ignore_paths: - - "*.md" - - "concourse/" - - "suppressions/" - - ".github/" - - "Vagrantfile" - -jobs: -#@ previous_job_name = None -#@ for ruby_version in all_truffleruby_versions: -#@ job_name = "truffle-{}".format(ruby_version) - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - #@ if/end previous_job_name: - passed: #@ [previous_job_name] - #@ for compile_flags in compile_flag_combos: - #@ task_name = ["rake-test"] - #@ task_name.extend(compile_flags) - - task: #@ "_".join(task_name) - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "truffle-{}".format(ruby_version))) - inputs: [{name: ci}, {name: nokogiri}] - params: - COMPILE_FLAGS: #@ " ".join(compile_flags) - run: { path: ci/concourse/tasks/rake-test/run.sh } - #@ end -#@ previous_job_name = job_name -#@ end - -#@ previous_job_name = None -#@ for ruby_version in all_truffleruby_versions: -#@ job_name = "truffle-pr-{}".format(ruby_version) - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - #@ if/end previous_job_name: - passed: #@ [previous_job_name] - #@ for compile_flags in compile_flag_combos: - #@ task_name = ["rake-test"] - #@ task_name.extend(compile_flags) - - task: #@ "_".join(task_name) - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "truffle-{}".format(ruby_version))) - inputs: [{name: ci}, {name: nokogiri-pr, path: nokogiri}] - params: - COMPILE_FLAGS: #@ " ".join(compile_flags) - run: { path: ci/concourse/tasks/rake-test/run.sh } - #@ end -#@ previous_job_name = job_name -#@ end diff --git a/concourse/nokogiri-truffleruby.yml.generated b/concourse/nokogiri-truffleruby.yml.generated deleted file mode 100644 index 5f96dd71e1..0000000000 --- a/concourse/nokogiri-truffleruby.yml.generated +++ /dev/null @@ -1,155 +0,0 @@ -jobs: -- name: truffle-nightly - plan: - - get: ci - - get: nokogiri - trigger: true - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: truffle-nightly - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - COMPILE_FLAGS: --disable-system-libraries --disable-static - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test_--disable-system-libraries_--disable-static - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: truffle-nightly - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - COMPILE_FLAGS: --disable-system-libraries --enable-static - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test_--disable-system-libraries_--enable-static - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: truffle-nightly - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - COMPILE_FLAGS: --enable-system-libraries - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test_--enable-system-libraries - public: true -- name: truffle-pr-nightly - plan: - - get: ci - - get: nokogiri-pr - trigger: true - version: every - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: truffle-nightly - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - COMPILE_FLAGS: --disable-system-libraries --disable-static - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test_--disable-system-libraries_--disable-static - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: truffle-nightly - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - COMPILE_FLAGS: --disable-system-libraries --enable-static - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test_--disable-system-libraries_--enable-static - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: truffle-nightly - type: registry-image - inputs: - - name: ci - - name: nokogiri-pr - path: nokogiri - params: - COMPILE_FLAGS: --enable-system-libraries - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test_--enable-system-libraries - public: true -resource_types: -- name: pull-request - source: - repository: teliaoss/github-pr-resource - type: registry-image -resources: -- icon: cog - name: ci - source: - branch: main - disable_ci_skip: true - uri: https://github.com/sparklemotion/nokogiri/ - type: git -- check_every: 5m0s - icon: github - name: nokogiri - source: - branch: main - ignore_paths: - - '*.md' - - concourse/** - - suppressions/** - - .github/** - - Vagrantfile - uri: https://github.com/sparklemotion/nokogiri/ - type: git - webhook_token: ((nokogiri-main-webhook-token)) -- check_every: 15m0s - icon: github - name: nokogiri-pr - source: - access_token: ((github-repo-status-access-token)) - ignore_drafts: true - ignore_paths: - - '*.md' - - concourse/ - - suppressions/ - - .github/ - - Vagrantfile - repository: sparklemotion/nokogiri - type: pull-request - webhook_token: ((nokogiri-pr-webhook-token)) diff --git a/concourse/nokogiri.yml b/concourse/nokogiri.yml deleted file mode 100644 index 3200ac84c4..0000000000 --- a/concourse/nokogiri.yml +++ /dev/null @@ -1,349 +0,0 @@ -#@ load("@ytt:template", "template") - -#@ load("ruby.star", "cruby_versions") -#@ all_cruby_versions = [] -#@ all_cruby_versions.extend(cruby_versions["supported"]) -#@ all_cruby_versions.extend(cruby_versions["beta"]) - -#@ load("ruby.star", "jruby_versions") -#@ all_jruby_versions = [] -#@ all_jruby_versions.extend(jruby_versions["supported"]) -#@ all_jruby_versions.extend(jruby_versions["beta"]) - -#@ load("ruby.star", "truffleruby_versions") -#@ all_truffleruby_versions = [] -#@ all_truffleruby_versions.extend(truffleruby_versions["supported"]) -#@ all_truffleruby_versions.extend(truffleruby_versions["beta"]) - ---- -#@ def registry_image(image_repo, image_tag): -platform: linux -image_resource: - type: registry-image - source: - repository: #@ image_repo - tag: #@ image_tag -#@ end - ---- -#@ def task_inputs(): -- name: ci -- name: nokogiri -#@ end - ---- -% require "common_prelude.rb" - -resource_types: - - name: webhook-notification - type: registry-image - source: - repository: flavorjones/webhook-notification-resource - tag: latest - -resources: - - name: ci - type: git - icon: cog - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: main - disable_ci_skip: true #! always get the latest pipeline configuration - - name: nokogiri - type: git - icon: "github" - check_every: 5m - webhook_token: ((nokogiri-main-webhook-token)) - source: - uri: https://github.com/sparklemotion/nokogiri/ - branch: main - ignore_paths: - - "*.md" - - "concourse/**" - - "suppressions/**" - - ".github/**" - - "Vagrantfile" - - -jobs: -#@ job_name = "rubocop" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - - task: rubocop - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic")) - inputs: [{name: ci}, {name: nokogiri}] - run: {path: ci/concourse/tasks/rake-test/rubocop.sh} - - -#@ job_name = "cruby-on-vanilla-ubuntu" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["rubocop"] - - in_parallel: - - task: rake-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic")) - inputs: [{name: ci}, {name: nokogiri}] - params: {TEST_WITH_SYSTEM_LIBRARIES: t} - run: {path: ci/concourse/tasks/rake-test/run.sh} - - task: rake-test-32bit - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic32")) - inputs: [{name: ci}, {name: nokogiri}] - params: {TEST_WITH_SYSTEM_LIBRARIES: t} - run: {path: ci/concourse/tasks/rake-test/run.sh} - - -#@ for ruby_version in all_cruby_versions: -#@ job_name = "cruby-{}".format(ruby_version) - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["cruby-on-vanilla-ubuntu"] - - in_parallel: - - task: rake-test-system-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(ruby_version))) - inputs: [{name: ci}, {name: nokogiri}] - params: - TEST_WITH_SYSTEM_LIBRARIES: t - #@ if ruby_version == cruby_versions["supported"][-1]: - CC_TEST_REPORTER_ID: ((code_climate_reporter_id_nokogiri)) - GIT_BRANCH: main - #@ end - run: {path: ci/concourse/tasks/rake-test/run.sh} - - task: rake-test-vendored-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(ruby_version))) - inputs: [{name: ci}, {name: nokogiri}] - run: {path: ci/concourse/tasks/rake-test/run.sh} - - task: rake-test-valgrind - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(ruby_version))) - inputs: [{name: ci}, {name: nokogiri}] - params: {TEST_WITH_VALGRIND: t} - run: {path: ci/concourse/tasks/rake-test/run.sh} -#@ end - - -#@ for ruby_version in all_jruby_versions: -#@ job_name = "jruby-{}".format(ruby_version) - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: ["cruby-on-vanilla-ubuntu"] - - task: rake-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "jruby-{}".format(ruby_version))) - inputs: [{name: ci}, {name: nokogiri}] - run: {path: ci/concourse/tasks/rake-test/run.sh} -#@ end - - -#@ job_name = "cruby-on-musl" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - in_parallel: - - task: rake-test-system-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: [{name: ci}, {name: nokogiri}] - run: {path: ci/concourse/tasks/rake-test/run.sh} - params: {TEST_WITH_SYSTEM_LIBRARIES: t} - - task: rake-test-valgrind - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: [{name: ci}, {name: nokogiri}] - run: {path: ci/concourse/tasks/rake-test/run.sh} - params: {TEST_WITH_VALGRIND: t} - - -#@ job_name = "cruby-with-libxmlruby" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - in_parallel: - - task: rake-test-system-libraries - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(cruby_versions["supported"][-1]))) - inputs: [{name: ci}, {name: nokogiri}] - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_SYSTEM_LIBRARIES: t - run: {path: ci/concourse/tasks/rake-test/run.sh} - - task: rake-test-valgrind - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(cruby_versions["supported"][-1]))) - inputs: [{name: ci}, {name: nokogiri}] - params: - BUNDLE_GEMFILE: "Gemfile-libxml-ruby" - TEST_WITH_VALGRIND: t - run: {path: ci/concourse/tasks/rake-test/run.sh} - - -#@ job_name = "cruby-gem-test" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-mri-x86_64-linux", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: [{name: ci}, {name: nokogiri}] - outputs: [{name: gems}] - run: {path: ci/concourse/tasks/gem-test/gem-build.sh} - - in_parallel: - - task: install-and-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-{}".format(cruby_versions["supported"][-1]))) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - - task: install-and-test-on-musl - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - - -#@ job_name = "cruby-native-gem-test" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - version: every - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-mri-x86_64-linux", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: [{name: ci}, {name: nokogiri}] - outputs: [{name: gems}] - params: {BUILD_NATIVE_GEM: "x86_64-linux"} - run: {path: ci/concourse/tasks/gem-test/gem-build.sh} - - in_parallel: - <% $native_ruby_versions.each do |ruby_version| %> - - task: install-and-test-<%= ruby_version %> - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "mri-<%= ruby_version %>")) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - <% end %> - - task: install-and-test-on-musl - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine")) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - - -#@ job_name = "cruby-native-gem-test-32bit" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - version: every - passed: #@ ["cruby-{}".format(ruby_version) for ruby_version in cruby_versions["supported"]] - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-mri-x86-linux", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: [{name: ci}, {name: nokogiri}] - outputs: [{name: gems}] - params: {BUILD_NATIVE_GEM: "x86-linux"} - run: {path: ci/concourse/tasks/gem-test/gem-build.sh} - - in_parallel: - - task: install-and-test-on-vanilla-ubuntu-32bit - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "bionic32")) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - - task: install-and-test-on-musl-32bit - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "alpine32")) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - - -#@ job_name = "jruby-gem-test" - - name: #@ job_name - public: true - plan: - - get: ci - - get: nokogiri - trigger: true - passed: #@ ["jruby-{}".format(ruby_version) for ruby_version in jruby_versions["supported"]] - - task: build - config: - "_": #@ template.replace(registry_image("larskanis/rake-compiler-dock-jruby", "<%= RakeCompilerDock::IMAGE_VERSION %>")) - inputs: [{name: ci}, {name: nokogiri}] - outputs: [{name: gems}] - run: {path: ci/concourse/tasks/gem-test/gem-build-java.sh} - - task: install-and-test - config: - "_": #@ template.replace(registry_image("flavorjones/nokogiri-test", "jruby-{}".format(jruby_versions["supported"][-1]))) - inputs: - - name: ci - - name: nokogiri - - name: gems - run: {path: ci/concourse/tasks/gem-test/gem-install-and-test.sh} - - - - name: build-success - public: true - disable_manual_trigger: true - plan: - - get: nokogiri - trigger: true - version: every - passed: - - cruby-on-musl - - cruby-with-libxmlruby - - cruby-gem-test - - cruby-native-gem-test - - jruby-gem-test diff --git a/concourse/nokogiri.yml.generated b/concourse/nokogiri.yml.generated deleted file mode 100644 index 5432443b2d..0000000000 --- a/concourse/nokogiri.yml.generated +++ /dev/null @@ -1,711 +0,0 @@ -jobs: -- name: rubocop - plan: - - get: ci - - get: nokogiri - trigger: true - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic - type: registry-image - inputs: - - name: ci - - name: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/rubocop.sh - task: rubocop - public: true -- name: cruby-on-vanilla-ubuntu - plan: - - get: ci - - get: nokogiri - passed: - - rubocop - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic32 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-32bit - public: true -- name: cruby-2.5 - plan: - - get: ci - - get: nokogiri - passed: - - cruby-on-vanilla-ubuntu - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-2.6 - plan: - - get: ci - - get: nokogiri - passed: - - cruby-on-vanilla-ubuntu - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-2.7 - plan: - - get: ci - - get: nokogiri - passed: - - cruby-on-vanilla-ubuntu - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-3.0 - plan: - - get: ci - - get: nokogiri - passed: - - cruby-on-vanilla-ubuntu - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - CC_TEST_REPORTER_ID: ((code_climate_reporter_id_nokogiri)) - GIT_BRANCH: main - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-vendored-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: jruby-9.2 - plan: - - get: ci - - get: nokogiri - passed: - - cruby-on-vanilla-ubuntu - trigger: true - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: jruby-9.2 - type: registry-image - inputs: - - name: ci - - name: nokogiri - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test - public: true -- name: cruby-on-musl - plan: - - get: ci - - get: nokogiri - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-with-libxmlruby - plan: - - get: ci - - get: nokogiri - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - BUNDLE_GEMFILE: Gemfile-libxml-ruby - TEST_WITH_SYSTEM_LIBRARIES: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-system-libraries - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - params: - BUNDLE_GEMFILE: Gemfile-libxml-ruby - TEST_WITH_VALGRIND: t - platform: linux - run: - path: ci/concourse/tasks/rake-test/run.sh - task: rake-test-valgrind - public: true -- name: cruby-gem-test - plan: - - get: ci - - get: nokogiri - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-mri-x86_64-linux - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - task: build - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-musl - public: true -- name: cruby-native-gem-test - plan: - - get: ci - - get: nokogiri - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - version: every - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-mri-x86_64-linux - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - params: - BUILD_NATIVE_GEM: x86_64-linux - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - task: build - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.5 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-2.5 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.6 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-2.6 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-2.7 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-2.7 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: mri-3.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-3.0 - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-musl - public: true -- name: cruby-native-gem-test-32bit - plan: - - get: ci - - get: nokogiri - passed: - - cruby-2.5 - - cruby-2.6 - - cruby-2.7 - - cruby-3.0 - trigger: true - version: every - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-mri-x86-linux - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - params: - BUILD_NATIVE_GEM: x86-linux - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build.sh - task: build - - in_parallel: - steps: - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: bionic32 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-vanilla-ubuntu-32bit - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: alpine32 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test-on-musl-32bit - public: true -- name: jruby-gem-test - plan: - - get: ci - - get: nokogiri - passed: - - jruby-9.2 - trigger: true - - config: - image_resource: - name: "" - source: - repository: larskanis/rake-compiler-dock-jruby - tag: 1.1.0 - type: registry-image - inputs: - - name: ci - - name: nokogiri - outputs: - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-build-java.sh - task: build - - config: - image_resource: - name: "" - source: - repository: flavorjones/nokogiri-test - tag: jruby-9.2 - type: registry-image - inputs: - - name: ci - - name: nokogiri - - name: gems - platform: linux - run: - path: ci/concourse/tasks/gem-test/gem-install-and-test.sh - task: install-and-test - public: true -- disable_manual_trigger: true - name: build-success - plan: - - get: nokogiri - passed: - - cruby-on-musl - - cruby-with-libxmlruby - - cruby-gem-test - - cruby-native-gem-test - - jruby-gem-test - trigger: true - version: every - public: true -resource_types: -- name: webhook-notification - source: - repository: flavorjones/webhook-notification-resource - tag: latest - type: registry-image -resources: -- icon: cog - name: ci - source: - branch: main - disable_ci_skip: true - uri: https://github.com/sparklemotion/nokogiri/ - type: git -- check_every: 5m0s - icon: github - name: nokogiri - source: - branch: main - ignore_paths: - - '*.md' - - concourse/** - - suppressions/** - - .github/** - - Vagrantfile - uri: https://github.com/sparklemotion/nokogiri/ - type: git - webhook_token: ((nokogiri-main-webhook-token)) diff --git a/concourse/shared/code-climate.sh b/concourse/shared/code-climate.sh deleted file mode 100644 index bacc3061cf..0000000000 --- a/concourse/shared/code-climate.sh +++ /dev/null @@ -1,72 +0,0 @@ -# -# Source this file to have access to two functions: -# -# code-climate-setup -# -# * downloads the CC CLI -# * sets up CC environment variables -# * invokes CC's `before-build` -# -# -# code-climate-shipit -# -# * invokes CC's `after-build` -# -# Note that the env var CC_TEST_REPORTER_ID will need to be set. You -# can find this on your Code Climate project's "Repo Settings" page. -# - -CC_CLI_URI="https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64" -CC_CLI=$(basename ${CC_CLI_URI}) - -function code-climate-setup { - save-option-xtrace-off - - if [ -z "${CC_TEST_REPORTER_ID:-}" ] ; then - echo "WARNING: code-climate-setup: CC_TEST_REPORTER_ID is not set, skipping." - else - wget --no-verbose ${CC_CLI_URI} - chmod +x ${CC_CLI} - - export CI_NAME="concourse" - - ./${CC_CLI} env || true - ./${CC_CLI} before-build || true - fi - - restore-option-xtrace -} - -function code-climate-shipit { - save-option-xtrace-off - - if [ -z "${CC_TEST_REPORTER_ID:-}" ] ; then - echo "WARNING: code-climate-shipit: CC_TEST_REPORTER_ID is not set, skipping." - else - ./${CC_CLI} after-build || true - fi - - restore-option-xtrace -} - - -# -# utilities to save and restore the `xtrace` setting so we don't leak credentials -# https://unix.stackexchange.com/questions/310957/how-to-restore-the-value-of-shell-options-like-set-x/310963 -# -OLD_OPTION_XTRACE="" - -function save-option-xtrace { - OLD_OPTION_XTRACE="$(shopt -po xtrace)" - set +x -} - -function save-option-xtrace-off { - save-option-xtrace - set +x -} - -function restore-option-xtrace { - set +vx # suppress the following eval statement - eval "${OLD_OPTION_XTRACE}" -} diff --git a/concourse/tasks/gem-test/gem-build-java.sh b/concourse/tasks/gem-test/gem-build-java.sh deleted file mode 100755 index 4f5ea5fbbc..0000000000 --- a/concourse/tasks/gem-test/gem-build-java.sh +++ /dev/null @@ -1,29 +0,0 @@ -#! /usr/bin/env bash - -test -e /etc/os-release && cat /etc/os-release - -cd nokogiri - -set -e -x -u # after the `cd` because of rvm - -OUTPUT_DIR="../gems" - -# inputs from a real git resource will contain this dir, but we may -# run this task via `fly execute` and so we need to do this to avoid -# cleanup, see extconf.rb do_clean -mkdir -p .git - -bundle install --local || bundle install - -# generate a fake version number -bundle exec rake set-version-to-timestamp - -bundle exec rake java gem - -if [ -e ./scripts/test-gem-file-contents ] ; then - ./scripts/test-gem-file-contents pkg/nokogiri*java.gem -fi - -mkdir -p ${OUTPUT_DIR} -cp -v pkg/nokogiri*java.gem ${OUTPUT_DIR} -sha256sum ${OUTPUT_DIR}/* diff --git a/concourse/tasks/gem-test/gem-build.sh b/concourse/tasks/gem-test/gem-build.sh deleted file mode 100755 index e98d4f72f4..0000000000 --- a/concourse/tasks/gem-test/gem-build.sh +++ /dev/null @@ -1,44 +0,0 @@ -#! /usr/bin/env bash - -test -e /etc/os-release && cat /etc/os-release - -if [ -n "${BUILD_NATIVE_GEM:-}" ] ; then - # normally part of rake-compiler-dock runas which we can't easily use in concourse - . /etc/rubybashrc - ln -s /usr/local/rake-compiler "$HOME"/.rake-compiler - export RAKE_EXTENSION_TASK_NO_NATIVE=true -fi - -cd nokogiri - -set -e -x -u # after the `cd` because of rvm - -OUTPUT_DIR="../gems" - -# inputs from a real git resource will contain this dir, but we may -# run this task via `fly execute` and so we need to do this to avoid -# cleanup, see extconf.rb do_clean -mkdir -p .git - -bundle install --local || bundle install - -# generate a fake version number -bundle exec rake set-version-to-timestamp - -if [ -n "${BUILD_NATIVE_GEM:-}" ] ; then - bundle exec rake gem:${BUILD_NATIVE_GEM}:builder FORCE_CROSS_COMPILING=true -else - # TODO we're only compiling so that we retrieve libxml2/libxslt - # tarballs, we can do better a couple of different ways - bundle exec rake clean compile - - bundle exec rake gem -fi - -if [[ -e ./scripts/test-gem-file-contents ]] ; then - ./scripts/test-gem-file-contents pkg/nokogiri*.gem -fi - -mkdir -p ${OUTPUT_DIR} -cp -v pkg/nokogiri*.gem ${OUTPUT_DIR} -sha256sum ${OUTPUT_DIR}/* diff --git a/concourse/tasks/gem-test/gem-install-and-test.sh b/concourse/tasks/gem-test/gem-install-and-test.sh deleted file mode 100755 index f110231b4d..0000000000 --- a/concourse/tasks/gem-test/gem-install-and-test.sh +++ /dev/null @@ -1,38 +0,0 @@ -#! /usr/bin/env bash - -test -e /etc/os-release && cat /etc/os-release - -set -e -x -u - -pushd gems - - gemfile=$(ls *.gem | head -n1) - sha256sum ${gemfile} - gem install --no-document ${gemfile} - gem list -d nokogiri - nokogiri -v - -popd - -pushd nokogiri - - if [ -n "${BUNDLE_APP_CONFIG:-}" ] ; then - export BUNDLE_CACHE_PATH="${BUNDLE_APP_CONFIG}/cache" - fi - - bundle install --local || bundle install # ensure dependencies are installed - - rm -rf lib ext # ensure we don't use the local files - rake test - - if [[ -e ./scripts/test-gem-installation ]] ; then - ./scripts/test-gem-installation - fi - -popd - -# cd out of the dir with the Gemfile and Gemfile.lock -# because that's confusing to older versions of rubygems (e.g., bionic32) -if [[ -e nokogiri/scripts/test-nokogumbo-compatibility ]] ; then - nokogiri/scripts/test-nokogumbo-compatibility -fi diff --git a/concourse/tasks/rake-test/rubocop.sh b/concourse/tasks/rake-test/rubocop.sh deleted file mode 100755 index d8979fca61..0000000000 --- a/concourse/tasks/rake-test/rubocop.sh +++ /dev/null @@ -1,10 +0,0 @@ -#! /usr/bin/env bash - -set -e -x -u - -pushd nokogiri - - bundle install --local || bundle install - bundle exec rake rubocop - -popd diff --git a/concourse/tasks/rake-test/run.ps1 b/concourse/tasks/rake-test/run.ps1 deleted file mode 100644 index 473f3eb725..0000000000 --- a/concourse/tasks/rake-test/run.ps1 +++ /dev/null @@ -1,11 +0,0 @@ -. "c:\var\vcap\packages\windows-ruby-dev-tools\prelude.ps1" - -$env:RUBYOPT = "-rdevkit" - -push-location nokogiri - - system-cmd "gem install bundler" - system-cmd "bundle install" - system-cmd "bundle exec rake compile test" - -pop-location diff --git a/concourse/tasks/rake-test/run.sh b/concourse/tasks/rake-test/run.sh deleted file mode 100755 index 60e3ec722f..0000000000 --- a/concourse/tasks/rake-test/run.sh +++ /dev/null @@ -1,40 +0,0 @@ -#! /usr/bin/env bash - -test -e /etc/os-release && cat /etc/os-release - -set -e -x -u - -source "$(dirname "$0")/../../shared/code-climate.sh" - -VERSION_INFO=$(ruby -v) -RUBY_ENGINE=$(cut -d" " -f1 <<< "${VERSION_INFO}") -RUBY_VERSION=$(cut -d" " -f2 <<< "${VERSION_INFO}") - -pushd nokogiri - - bundle install --local || bundle install - - if [[ "${TEST_WITH_SYSTEM_LIBRARIES:-}" == "t" ]] ; then - # TODO remove this option, prefer COMPILE_FLAGS instead - export NOKOGIRI_USE_SYSTEM_LIBRARIES=t - fi - - compile_task_args="" - if [[ "${COMPILE_FLAGS:-}" != "" ]] ; then - compile_task_args="-- ${COMPILE_FLAGS}" - fi - - test_task="test" - if [[ "${TEST_WITH_VALGRIND:-}" == "t" ]] ; then - test_task="test:valgrind" # override - # export TESTOPTS="-v" # see more verbose output to help narrow down warnings - fi - - code-climate-setup - - bundle exec rake compile ${compile_task_args} - bundle exec rake ${test_task} - - code-climate-shipit - -popd diff --git a/dependencies.yml b/dependencies.yml index ecd5de4c41..bb07a276e7 100644 --- a/dependencies.yml +++ b/dependencies.yml @@ -1,74 +1,23 @@ libxml2: - version: "2.9.10" - sha256: "aafee193ffb8fe0c82d4afef6ef91972cbaf5feea100edc2f262750611b4be1f" - # manually verified checksum: - # - # $ gpg --verify libxml2-2.9.10.tar.gz.asc ports/archives/libxml2-2.9.10.tar.gz - # gpg: Signature made Wed 30 Oct 2019 03:15:42 PM EDT - # gpg: using RSA key DB46681BB91ADCEA170FA2D415588B26596BEA5D - # gpg: Good signature from "Daniel Veillard (Red Hat work email) " [unknown] - # gpg: aka "Daniel Veillard " [unknown] - # gpg: WARNING: This key is not certified with a trusted signature! - # gpg: There is no indication that the signature belongs to the owner. - # Primary key fingerprint: C744 15BA 7C9C 7F78 F02E 1DC3 4606 B8A5 DE95 BC1F - # Subkey fingerprint: DB46 681B B91A DCEA 170F A2D4 1558 8B26 596B EA5D - # - # using this pgp signature: - # - # -----BEGIN PGP SIGNATURE----- - # - # iQEzBAABCAAdFiEE20ZoG7ka3OoXD6LUFViLJllr6l0FAl254V4ACgkQFViLJllr - # 6l0ldAf6Azt4/oKDfMKRd+xaykUrb+34dr2ZRsjRDS1cnelAtL9TCWhE5lOkLI3c - # 3FyNRaLhOEOOluZmKTJYyzS42JSSHDhxGj14gIeyafOjvRhHG3h1m5GvMmvgKWkd - # qzxFrVFSG26iWJxMvxIA88t7M+QHb7ff7xR29ETJscewEmAd3LmZITglK02lWeGz - # LfxfLuakM6RnCUu0dzacJKO0nMOKju+RL/N9bciI/UOhNYEkWqPnzC0GzbvFLqDu - # rM+OvCSewSTziiejpdrUwYXkY5Ui2+cxUbacLauEr8iRLg7xXKqv27NORE4yeQcS - # LgIhxG/qSNfihMS6E1ZO5bK2DbGCZQ== - # =ZNuc - # -----END PGP SIGNATURE----- - # + version: "2.10.3" + sha256: "5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c" + # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.10/libxml2-2.10.3.sha256sum libxslt: - version: "1.1.34" - sha256: "98b1bd46d6792925ad2dfe9a87452ea2adebf69dcb9919ffd55bf926a7f93f7f" - # manually verified checksum: - # - # $ gpg --verify ~/Downloads/libxslt-1.1.34.tar.gz.asc ports/archives/libxslt-1.1.34.tar.gz - # gpg: Signature made Wed 30 Oct 2019 04:02:48 PM EDT - # gpg: using RSA key DB46681BB91ADCEA170FA2D415588B26596BEA5D - # gpg: Good signature from "Daniel Veillard (Red Hat work email) " [unknown] - # gpg: aka "Daniel Veillard " [unknown] - # gpg: WARNING: This key is not certified with a trusted signature! - # gpg: There is no indication that the signature belongs to the owner. - # Primary key fingerprint: C744 15BA 7C9C 7F78 F02E 1DC3 4606 B8A5 DE95 BC1F - # Subkey fingerprint: DB46 681B B91A DCEA 170F A2D4 1558 8B26 596B EA5D - # - # using this pgp signature: - # - # -----BEGIN PGP SIGNATURE----- - # - # iQEzBAABCAAdFiEE20ZoG7ka3OoXD6LUFViLJllr6l0FAl257GgACgkQFViLJllr - # 6l2vVggAjJEHmASiS56SxhPOsGqbfBihM66gQFoIymQfMu2430N1GSTkLsfbkJO8 - # 8yBX11NjzK/m9uxwshMW3rVCU7EpL3PUimN3reXdPiQj9hAOAWF1V3BZNevbQC2E - # FCIraioukaidf8sjUG4/sGpK/gOcP/3hYoN0HUoBigCNJjDqhijxM3M3GJJtCASp - # jL4CQbs2OmxW8ixOZbuWEESvFFHUgYRsdZjRVN+GRfSOvJjxypurmYwQ3RjO7JxL - # 2FY8qKQ+xpeID8NV8F5OUEvWBjk1QS133VTqBZNlONdnEtV/og6jNu5k0O/Kvhup - # caR+8TMErOcLr9OgDklO6DoYyAsf9Q== - # =g4i4 - # -----END PGP SIGNATURE----- - # + version: "1.1.37" + sha256: "3a4b27dc8027ccd6146725950336f1ec520928f320f144eb5fa7990ae6123ab4" + # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.37.sha256sum zlib: - version: "1.2.11" - sha256: "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1" + version: "1.2.13" + sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30" # SHA-256 hash provided on http://zlib.net/ libiconv: - version: "1.15" - sha256: "ccf536620a45458d26ba83887a983b96827001e92a13847b45e4925cc8913178" - # gpg: Signature made Fri Feb 3 00:38:12 2017 CET + version: "1.16" + sha256: "e6a1b1b589654277ee790cce3734f07876ac4ccfaecbee8afa0b649cf529cc04" + # gpg: Signature made Fri 26 Apr 2019 03:36:38 PM EDT # gpg: using RSA key 4F494A942E4616C2 - # gpg: Good signature from "Bruno Haible (Open Source Development) " [unknown] - # gpg: WARNING: This key is not certified with a trusted signature! - # gpg: There is no indication that the signature belongs to the owner. + # gpg: Good signature from "Bruno Haible (Open Source Development) " [expired] + # gpg: Note: This key has expired! # Primary key fingerprint: 68D9 4D8A AEEA D48A E7DC 5B90 4F49 4A94 2E46 16C2 diff --git a/ext/java/nokogiri/HtmlDocument.java b/ext/java/nokogiri/Html4Document.java similarity index 83% rename from ext/java/nokogiri/HtmlDocument.java rename to ext/java/nokogiri/Html4Document.java index 7b95984793..bf1c660545 100644 --- a/ext/java/nokogiri/HtmlDocument.java +++ b/ext/java/nokogiri/Html4Document.java @@ -18,13 +18,13 @@ import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; /** - * Class for Nokogiri::HTML::Document. + * Class for Nokogiri::HTML4::Document. * * @author sergio * @author Yoko Harada */ -@JRubyClass(name = "Nokogiri::HTML::Document", parent = "Nokogiri::XML::Document") -public class HtmlDocument extends XmlDocument +@JRubyClass(name = "Nokogiri::HTML4::Document", parent = "Nokogiri::XML::Document") +public class Html4Document extends XmlDocument { private static final String DEFAULT_CONTENT_TYPE = "html"; private static final String DEFAULT_PUBLIC_ID = "-//W3C//DTD HTML 4.01//EN"; @@ -33,19 +33,19 @@ public class HtmlDocument extends XmlDocument private String parsed_encoding = null; public - HtmlDocument(Ruby ruby, RubyClass klazz) + Html4Document(Ruby ruby, RubyClass klazz) { super(ruby, klazz); } public - HtmlDocument(Ruby runtime, Document document) + Html4Document(Ruby runtime, Document document) { this(runtime, getNokogiriClass(runtime, "Nokogiri::XML::Document"), document); } public - HtmlDocument(Ruby ruby, RubyClass klazz, Document doc) + Html4Document(Ruby ruby, RubyClass klazz, Document doc) { super(ruby, klazz, doc); } @@ -55,10 +55,10 @@ public class HtmlDocument extends XmlDocument rbNew(ThreadContext context, IRubyObject klazz, IRubyObject[] args) { final Ruby runtime = context.runtime; - HtmlDocument htmlDocument; + Html4Document htmlDocument; try { Document docNode = createNewDocument(runtime); - htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz); + htmlDocument = (Html4Document) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(runtime, (RubyClass) klazz); htmlDocument.setDocumentNode(context.runtime, docNode); } catch (Exception ex) { throw asRuntimeError(runtime, "couldn't create document: ", ex); @@ -135,13 +135,6 @@ public class HtmlDocument extends XmlDocument return parsed_encoding; } - /* - * call-seq: - * read_io(io, url, encoding, options) - * - * Read the HTML document from +io+ with given +url+, +encoding+, - * and +options+. See Nokogiri::HTML.parse - */ @JRubyMethod(meta = true, required = 4) public static IRubyObject read_io(ThreadContext context, IRubyObject klass, IRubyObject[] args) @@ -151,13 +144,6 @@ public class HtmlDocument extends XmlDocument return ctx.parse(context, (RubyClass) klass, args[1]); } - /* - * call-seq: - * read_memory(string, url, encoding, options) - * - * Read the HTML document contained in +string+ with given +url+, +encoding+, - * and +options+. See Nokogiri::HTML.parse - */ @JRubyMethod(meta = true, required = 4) public static IRubyObject read_memory(ThreadContext context, IRubyObject klass, IRubyObject[] args) diff --git a/ext/java/nokogiri/HtmlElementDescription.java b/ext/java/nokogiri/Html4ElementDescription.java similarity index 90% rename from ext/java/nokogiri/HtmlElementDescription.java rename to ext/java/nokogiri/Html4ElementDescription.java index b96b7ab6e1..88da6237ab 100644 --- a/ext/java/nokogiri/HtmlElementDescription.java +++ b/ext/java/nokogiri/Html4ElementDescription.java @@ -16,12 +16,12 @@ import org.jruby.runtime.builtin.IRubyObject; /** - * Class for Nokogiri::HTML::ElementDescription. + * Class for Nokogiri::HTML4::ElementDescription. * * @author Patrick Mahoney */ -@JRubyClass(name = "Nokogiri::HTML::ElementDescription") -public class HtmlElementDescription extends RubyObject +@JRubyClass(name = "Nokogiri::HTML4::ElementDescription") +public class Html4ElementDescription extends RubyObject { /** @@ -38,7 +38,7 @@ public class HtmlElementDescription extends RubyObject protected HTMLElements.Element element; public - HtmlElementDescription(Ruby runtime, RubyClass rubyClass) + Html4ElementDescription(Ruby runtime, RubyClass rubyClass) { super(runtime, rubyClass); } @@ -89,8 +89,8 @@ public class HtmlElementDescription extends RubyObject return context.nil; } - HtmlElementDescription desc = - new HtmlElementDescription(context.getRuntime(), (RubyClass)klazz); + Html4ElementDescription desc = + new Html4ElementDescription(context.getRuntime(), (RubyClass)klazz); desc.element = elem; return desc; } diff --git a/ext/java/nokogiri/HtmlEntityLookup.java b/ext/java/nokogiri/Html4EntityLookup.java similarity index 79% rename from ext/java/nokogiri/HtmlEntityLookup.java rename to ext/java/nokogiri/Html4EntityLookup.java index 2388cbf369..e33f5b2273 100644 --- a/ext/java/nokogiri/HtmlEntityLookup.java +++ b/ext/java/nokogiri/Html4EntityLookup.java @@ -12,16 +12,16 @@ import org.jruby.runtime.builtin.IRubyObject; /** - * Class for Nokogiri::HTML::EntityLookup. + * Class for Nokogiri::HTML4::EntityLookup. * * @author Patrick Mahoney */ -@JRubyClass(name = "Nokogiri::HTML::EntityLookup") -public class HtmlEntityLookup extends RubyObject +@JRubyClass(name = "Nokogiri::HTML4::EntityLookup") +public class Html4EntityLookup extends RubyObject { public - HtmlEntityLookup(Ruby runtime, RubyClass rubyClass) + Html4EntityLookup(Ruby runtime, RubyClass rubyClass) { super(runtime, rubyClass); } @@ -41,7 +41,7 @@ public class HtmlEntityLookup extends RubyObject if (val == -1) { return ruby.getNil(); } IRubyObject edClass = - ruby.getClassFromPath("Nokogiri::HTML::EntityDescription"); + ruby.getClassFromPath("Nokogiri::HTML4::EntityDescription"); IRubyObject edObj = invoke(context, edClass, "new", ruby.newFixnum(val), ruby.newString(name), ruby.newString(name + " entity")); diff --git a/ext/java/nokogiri/HtmlSaxParserContext.java b/ext/java/nokogiri/Html4SaxParserContext.java similarity index 84% rename from ext/java/nokogiri/HtmlSaxParserContext.java rename to ext/java/nokogiri/Html4SaxParserContext.java index 96896bdfe9..91f5b0a58e 100644 --- a/ext/java/nokogiri/HtmlSaxParserContext.java +++ b/ext/java/nokogiri/Html4SaxParserContext.java @@ -24,27 +24,27 @@ import static nokogiri.internals.NokogiriHelpers.rubyStringToString; /** - * Class for Nokogiri::HTML::SAX::ParserContext. + * Class for Nokogiri::HTML4::SAX::ParserContext. * * @author serabe * @author Patrick Mahoney * @author Yoko Harada */ -@JRubyClass(name = "Nokogiri::HTML::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext") -public class HtmlSaxParserContext extends XmlSaxParserContext +@JRubyClass(name = "Nokogiri::HTML4::SAX::ParserContext", parent = "Nokogiri::XML::SAX::ParserContext") +public class Html4SaxParserContext extends XmlSaxParserContext { - static HtmlSaxParserContext + static Html4SaxParserContext newInstance(final Ruby runtime, final RubyClass klazz) { - HtmlSaxParserContext instance = new HtmlSaxParserContext(runtime, klazz); + Html4SaxParserContext instance = new Html4SaxParserContext(runtime, klazz); instance.initialize(runtime); return instance; } public - HtmlSaxParserContext(Ruby ruby, RubyClass rubyClass) + Html4SaxParserContext(Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); } @@ -68,7 +68,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext return parser; } catch (SAXException ex) { throw new SAXException( - "Problem while creating HTML SAX Parser: " + ex.toString()); + "Problem while creating HTML4 SAX Parser: " + ex.toString()); } } @@ -79,7 +79,7 @@ public class HtmlSaxParserContext extends XmlSaxParserContext IRubyObject data, IRubyObject encoding) { - HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klazz); + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klazz); String javaEncoding = findEncodingName(context, encoding); if (javaEncoding != null) { CharSequence input = applyEncoding(rubyStringToString(data.convertToString()), javaEncoding); @@ -231,7 +231,14 @@ static EncodingType get(final int ordinal) IRubyObject data, IRubyObject encoding) { - HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass); + if (!(data instanceof RubyString)) { + throw context.getRuntime().newTypeError("data must be kind_of String"); + } + if (!(encoding instanceof RubyString)) { + throw context.getRuntime().newTypeError("data must be kind_of String"); + } + + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); ctx.setInputSourceFile(context, data); String javaEncoding = findEncodingName(context, encoding); if (javaEncoding != null) { @@ -247,7 +254,11 @@ static EncodingType get(final int ordinal) IRubyObject data, IRubyObject encoding) { - HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(context.runtime, (RubyClass) klass); + if (!(encoding instanceof RubyFixnum)) { + throw context.getRuntime().newTypeError("encoding must be kind_of String"); + } + + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass); ctx.setIOInputSource(context, data, context.nil); String javaEncoding = findEncodingName(context, encoding); if (javaEncoding != null) { @@ -258,12 +269,12 @@ static EncodingType get(final int ordinal) /** * Create a new parser context that will read from a raw input stream. - * Meant to be run in a separate thread by HtmlSaxPushParser. + * Meant to be run in a separate thread by Html4SaxPushParser. */ - static HtmlSaxParserContext + static Html4SaxParserContext parse_stream(final Ruby runtime, RubyClass klass, InputStream stream) { - HtmlSaxParserContext ctx = HtmlSaxParserContext.newInstance(runtime, klass); + Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(runtime, klass); ctx.setInputSource(stream); return ctx; } diff --git a/ext/java/nokogiri/HtmlSaxPushParser.java b/ext/java/nokogiri/Html4SaxPushParser.java similarity index 80% rename from ext/java/nokogiri/HtmlSaxPushParser.java rename to ext/java/nokogiri/Html4SaxPushParser.java index b056d1dbfe..8ff7088b6c 100644 --- a/ext/java/nokogiri/HtmlSaxPushParser.java +++ b/ext/java/nokogiri/Html4SaxPushParser.java @@ -1,39 +1,34 @@ package nokogiri; -import static nokogiri.XmlSaxPushParser.terminateExecution; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.IOException; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; - -import nokogiri.internals.*; - +import nokogiri.internals.ClosedStreamException; +import nokogiri.internals.NokogiriBlockingQueueInputStream; +import nokogiri.internals.NokogiriHelpers; +import nokogiri.internals.ParserContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyObject; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; -import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.*; + +import static nokogiri.XmlSaxPushParser.terminateExecution; +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static org.jruby.runtime.Helpers.invoke; + /** - * Class for Nokogiri::HTML::SAX::PushParser + * Class for Nokogiri::HTML4::SAX::PushParser * * @author * @author Piotr Szmielew - based on Nokogiri::XML::SAX::PushParser */ -@JRubyClass(name = "Nokogiri::HTML::SAX::PushParser") -public class HtmlSaxPushParser extends RubyObject +@JRubyClass(name = "Nokogiri::HTML4::SAX::PushParser") +public class Html4SaxPushParser extends RubyObject { ParserContext.Options options; IRubyObject saxParser; @@ -41,11 +36,11 @@ public class HtmlSaxPushParser extends RubyObject NokogiriBlockingQueueInputStream stream; private ParserTask parserTask = null; - private FutureTask futureTask = null; + private FutureTask futureTask = null; private ExecutorService executor = null; public - HtmlSaxPushParser(Ruby ruby, RubyClass rubyClass) + Html4SaxPushParser(Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); } @@ -111,7 +106,7 @@ public class HtmlSaxPushParser extends RubyObject final ByteArrayInputStream data = NokogiriHelpers.stringBytesToStream(chunk); if (data == null) { terminateTask(context.runtime); - throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML::SyntaxError + throw XmlSyntaxError.createHTMLSyntaxError(context.runtime).toThrowable(); // Nokogiri::HTML4::SyntaxError } int errorCount0 = parserTask.getErrorCount(); @@ -134,7 +129,7 @@ public class HtmlSaxPushParser extends RubyObject if (!options.recover && parserTask.getErrorCount() > errorCount0) { terminateTask(context.runtime); - throw parserTask.getLastError(); + throw parserTask.getLastError().toThrowable(); } return this; @@ -149,12 +144,12 @@ public class HtmlSaxPushParser extends RubyObject assert saxParser != null : "saxParser null"; parserTask = new ParserTask(context, saxParser, stream); - futureTask = new FutureTask((Callable) parserTask); + futureTask = new FutureTask((Callable) parserTask); executor = Executors.newSingleThreadExecutor(new ThreadFactory() { @Override public Thread newThread(Runnable r) { Thread t = new Thread(r); - t.setName("HtmlSaxPushParser"); + t.setName("Html4SaxPushParser"); t.setDaemon(true); return t; } @@ -187,14 +182,14 @@ public Thread newThread(Runnable r) { futureTask = null; } - private static HtmlSaxParserContext + private static Html4SaxParserContext parse(final Ruby runtime, final InputStream stream) { - RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML::SAX::ParserContext"); - return HtmlSaxParserContext.parse_stream(runtime, klazz, stream); + RubyClass klazz = getNokogiriClass(runtime, "Nokogiri::HTML4::SAX::ParserContext"); + return Html4SaxParserContext.parse_stream(runtime, klazz, stream); } - static class ParserTask extends XmlSaxPushParser.ParserTask /* */ + static class ParserTask extends XmlSaxPushParser.ParserTask /* */ { private @@ -204,10 +199,10 @@ static class ParserTask extends XmlSaxPushParser.ParserTask /* nokogiriClassCache = new HashMap(); nokogiriClassCache.put("Nokogiri::EncodingHandler", (RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler")); - nokogiriClassCache.put("Nokogiri::HTML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::Document")); - nokogiriClassCache.put("Nokogiri::HTML::ElementDescription", - (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::ElementDescription")); + nokogiriClassCache.put("Nokogiri::HTML4::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::Document")); + nokogiriClassCache.put("Nokogiri::HTML4::ElementDescription", + (RubyClass)ruby.getClassFromPath("Nokogiri::HTML4::ElementDescription")); nokogiriClassCache.put("Nokogiri::XML::Attr", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Attr")); nokogiriClassCache.put("Nokogiri::XML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Document")); nokogiriClassCache.put("Nokogiri::XML::DocumentFragment", @@ -81,7 +81,7 @@ public class NokogiriService implements BasicLibraryService RubyModule nokogiri = ruby.defineModule("Nokogiri"); RubyModule xmlModule = nokogiri.defineModuleUnder("XML"); RubyModule xmlSaxModule = xmlModule.defineModuleUnder("SAX"); - RubyModule htmlModule = nokogiri.defineModuleUnder("HTML"); + RubyModule htmlModule = nokogiri.defineModuleUnder("HTML4"); RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX"); RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT"); @@ -201,11 +201,11 @@ public class NokogiriService implements BasicLibraryService { RubyClass htmlElemDesc = htmlModule.defineClassUnder("ElementDescription", ruby.getObject(), HTML_ELEMENT_DESCRIPTION_ALLOCATOR); - htmlElemDesc.defineAnnotatedMethods(HtmlElementDescription.class); + htmlElemDesc.defineAnnotatedMethods(Html4ElementDescription.class); RubyClass htmlEntityLookup = htmlModule.defineClassUnder("EntityLookup", ruby.getObject(), HTML_ENTITY_LOOKUP_ALLOCATOR); - htmlEntityLookup.defineAnnotatedMethods(HtmlEntityLookup.class); + htmlEntityLookup.defineAnnotatedMethods(Html4EntityLookup.class); } private void @@ -216,7 +216,7 @@ public class NokogiriService implements BasicLibraryService //RubyModule htmlDoc = html.defineOrGetClassUnder("Document", document); RubyModule htmlDocument = htmlModule.defineClassUnder("Document", xmlDocument, HTML_DOCUMENT_ALLOCATOR); - htmlDocument.defineAnnotatedMethods(HtmlDocument.class); + htmlDocument.defineAnnotatedMethods(Html4Document.class); } private void @@ -231,11 +231,11 @@ public class NokogiriService implements BasicLibraryService RubyClass htmlSaxPushParser = htmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), HTML_SAXPUSHPARSER_ALLOCATOR); - htmlSaxPushParser.defineAnnotatedMethods(HtmlSaxPushParser.class); + htmlSaxPushParser.defineAnnotatedMethods(Html4SaxPushParser.class); RubyClass htmlSaxParserContext = htmlSaxModule.defineClassUnder("ParserContext", xmlSaxParserContext, HTML_SAXPARSER_CONTEXT_ALLOCATOR); - htmlSaxParserContext.defineAnnotatedMethods(HtmlSaxParserContext.class); + htmlSaxParserContext.defineAnnotatedMethods(Html4SaxParserContext.class); } private void @@ -255,30 +255,30 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { public static final ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { - private HtmlDocument htmlDocument = null; + private Html4Document htmlDocument = null; public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (htmlDocument == null) { htmlDocument = new HtmlDocument(runtime, klazz); } + if (htmlDocument == null) { htmlDocument = new Html4Document(runtime, klazz); } try { - HtmlDocument clone = (HtmlDocument) htmlDocument.clone(); + Html4Document clone = (Html4Document) htmlDocument.clone(); clone.setMetaClass(klazz); return clone; } catch (CloneNotSupportedException e) { - return new HtmlDocument(runtime, klazz); + return new Html4Document(runtime, klazz); } } }; private static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() { - private HtmlSaxParserContext htmlSaxParserContext = null; + private Html4SaxParserContext htmlSaxParserContext = null; public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - if (htmlSaxParserContext == null) { htmlSaxParserContext = new HtmlSaxParserContext(runtime, klazz); } + if (htmlSaxParserContext == null) { htmlSaxParserContext = new Html4SaxParserContext(runtime, klazz); } try { - HtmlSaxParserContext clone = (HtmlSaxParserContext) htmlSaxParserContext.clone(); + Html4SaxParserContext clone = (Html4SaxParserContext) htmlSaxParserContext.clone(); clone.setMetaClass(klazz); return clone; } catch (CloneNotSupportedException e) { - return new HtmlSaxParserContext(runtime, klazz); + return new Html4SaxParserContext(runtime, klazz); } } }; @@ -287,7 +287,7 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new HtmlElementDescription(runtime, klazz); + return new Html4ElementDescription(runtime, klazz); } }; @@ -295,7 +295,7 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new HtmlEntityLookup(runtime, klazz); + return new Html4EntityLookup(runtime, klazz); } }; @@ -571,7 +571,7 @@ public IRubyObject allocate(Ruby runtime, RubyClass klazz) { private static final ObjectAllocator HTML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() { public IRubyObject allocate(Ruby runtime, RubyClass klazz) { - return new HtmlSaxPushParser(runtime, klazz); + return new Html4SaxPushParser(runtime, klazz); } }; diff --git a/ext/java/nokogiri/XmlAttr.java b/ext/java/nokogiri/XmlAttr.java index 6335faa89f..6c0e96636e 100644 --- a/ext/java/nokogiri/XmlAttr.java +++ b/ext/java/nokogiri/XmlAttr.java @@ -117,7 +117,7 @@ public class XmlAttr extends XmlNode String attrName = ((Attr) node).getName(); if (attrName == null) { return context.nil; } - if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof HtmlDocument)) { + if (node.getNamespaceURI() != null && !(document(context.runtime) instanceof Html4Document)) { attrName = NokogiriHelpers.getLocalPart(attrName); if (attrName == null) { return context.nil; } } @@ -137,7 +137,7 @@ public class XmlAttr extends XmlNode isHtml(ThreadContext context) { return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.getRuntime(), - "Nokogiri::HTML::Document")); + "Nokogiri::HTML4::Document")); } @Override diff --git a/ext/java/nokogiri/XmlDocument.java b/ext/java/nokogiri/XmlDocument.java index 139659bad2..6a3ebeef3a 100644 --- a/ext/java/nokogiri/XmlDocument.java +++ b/ext/java/nokogiri/XmlDocument.java @@ -280,8 +280,8 @@ private static class DocumentBuilderFactoryHolder XmlDocument xmlDocument; try { Document docNode = createNewDocument(runtime); - if ("Nokogiri::HTML::Document".equals(((RubyClass)klazz).getName())) { - xmlDocument = new HtmlDocument(context.runtime, (RubyClass) klazz, docNode); + if ("Nokogiri::HTML4::Document".equals(((RubyClass)klazz).getName())) { + xmlDocument = new Html4Document(context.runtime, (RubyClass) klazz, docNode); } else { xmlDocument = new XmlDocument(context.runtime, (RubyClass) klazz, docNode); } @@ -443,7 +443,7 @@ private static class DocumentBuilderFactoryHolder return new_root; } if (!(new_root instanceof XmlNode)) { - throw context.runtime.newArgumentError("expected Nokogiri::XML::Node but received " + new_root.getType()); + throw context.runtime.newArgumentError("expected Nokogiri::XML::Node but received " + new_root.getType()); } XmlNode newRoot = asXmlNode(context, new_root); @@ -657,17 +657,17 @@ private static class DocumentBuilderFactoryHolder } String algorithmURI = null; switch (mode) { - case 0: // XML_C14N_1_0 - if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; } - else { algorithmURI = Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; } - break; - case 1: // XML_C14N_EXCLUSIVE_1_0 - if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; } - else { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; } - break; - case 2: // XML_C14N_1_1 = 2 - if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; } - else { algorithmURI = Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; } + case 0: // XML_C14N_1_0 + if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_WITH_COMMENTS; } + else { algorithmURI = Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS; } + break; + case 1: // XML_C14N_EXCLUSIVE_1_0 + if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_WITH_COMMENTS; } + else { algorithmURI = Canonicalizer.ALGO_ID_C14N_EXCL_OMIT_COMMENTS; } + break; + case 2: // XML_C14N_1_1 = 2 + if (with_comments) { algorithmURI = Canonicalizer.ALGO_ID_C14N11_WITH_COMMENTS; } + else { algorithmURI = Canonicalizer.ALGO_ID_C14N11_OMIT_COMMENTS; } } try { Canonicalizer canonicalizer = Canonicalizer.getInstance(algorithmURI); diff --git a/ext/java/nokogiri/XmlDocumentFragment.java b/ext/java/nokogiri/XmlDocumentFragment.java index 17c1be9081..4281df113e 100644 --- a/ext/java/nokogiri/XmlDocumentFragment.java +++ b/ext/java/nokogiri/XmlDocumentFragment.java @@ -17,6 +17,7 @@ import org.jruby.RubyString; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Block; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; @@ -34,8 +35,6 @@ public class XmlDocumentFragment extends XmlNode { - private XmlElement fragmentContext; - public XmlDocumentFragment(Ruby ruby) { @@ -48,9 +47,9 @@ public class XmlDocumentFragment extends XmlNode super(ruby, klazz); } - @JRubyMethod(name = "new", meta = true, required = 1, optional = 2) + @JRubyMethod(name = "new", meta = true, required = 1, optional = 3) public static IRubyObject - rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args) + rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args, Block block) { if (args.length < 1) { throw context.runtime.newArgumentError(args.length, 1); @@ -75,11 +74,7 @@ public class XmlDocumentFragment extends XmlNode fragment.setDocument(context, doc); fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment()); - //TODO: Get namespace definitions from doc. - if (args.length == 3 && args[2] != null && args[2] instanceof XmlElement) { - fragment.fragmentContext = (XmlElement)args[2]; - } - Helpers.invoke(context, fragment, "initialize", args); + Helpers.invoke(context, fragment, "initialize", args, block); return fragment; } @@ -158,12 +153,6 @@ public class XmlDocumentFragment extends XmlNode return null; } - public XmlElement - getFragmentContext() - { - return fragmentContext; - } - @Override public void relink_namespace(ThreadContext context) diff --git a/ext/java/nokogiri/XmlElementContent.java b/ext/java/nokogiri/XmlElementContent.java index 10b35c3e9d..0ff13e2da1 100644 --- a/ext/java/nokogiri/XmlElementContent.java +++ b/ext/java/nokogiri/XmlElementContent.java @@ -142,11 +142,11 @@ public IRubyObject value(Ruby runtime) right = runtime.getNil(); switch (type) { - case SEQ: - case OR: - applyGroup(runtime, klass, doc, iter); - default: - // noop + case SEQ: + case OR: + applyGroup(runtime, klass, doc, iter); + default: + // noop } } diff --git a/ext/java/nokogiri/XmlNamespace.java b/ext/java/nokogiri/XmlNamespace.java index 557d6ddd15..25c6adb050 100644 --- a/ext/java/nokogiri/XmlNamespace.java +++ b/ext/java/nokogiri/XmlNamespace.java @@ -80,14 +80,6 @@ public class XmlNamespace extends RubyObject return href; } - void - deleteHref() - { - href = "http://www.w3.org/XML/1998/namespace"; - hrefRuby = null; - attr.getOwnerElement().removeAttributeNode(attr); - } - public static XmlNamespace createFromAttr(Ruby runtime, Attr attr) { diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 25f072c9f7..ee8d970a9f 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -12,6 +12,7 @@ import org.apache.xerces.dom.CoreDocumentImpl; import org.jruby.Ruby; import org.jruby.RubyArray; +import org.jruby.RubyBoolean; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyInteger; @@ -35,6 +36,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; +import org.w3c.dom.Comment; import nokogiri.internals.HtmlDomParserContext; import nokogiri.internals.NokogiriHelpers; @@ -303,6 +305,14 @@ public class XmlNode extends RubyObject IRubyObject name = args[0]; IRubyObject doc = args[1]; + if (!(doc instanceof XmlNode)) { + throw context.runtime.newArgumentError("document must be a Nokogiri::XML::Node"); + } + if (!(doc instanceof XmlDocument)) { + // TODO: deprecate allowing Node + context.runtime.getWarnings().warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + } + Document document = asXmlNode(context, doc).getOwnerDocument(); if (document == null) { throw context.runtime.newArgumentError("node must have owner document"); @@ -421,7 +431,14 @@ public class XmlNode extends RubyObject String nsURI = e.lookupNamespaceURI(prefix); this.node = NokogiriHelpers.renameNode(e, nsURI, e.getNodeName()); - if (nsURI == null || nsURI.isEmpty()) { return; } + if (nsURI == null || nsURI.isEmpty()) { + RubyBoolean ns_inherit = + (RubyBoolean)document(context.runtime).getInstanceVariable("@namespace_inheritance"); + if (ns_inherit.isTrue()) { + set_namespace(context, ((XmlNode)parent(context)).namespace(context)); + } + return; + } String currentPrefix = e.getParentNode().lookupPrefix(nsURI); String currentURI = e.getParentNode().lookupNamespaceURI(prefix); @@ -645,7 +662,7 @@ public class XmlNode extends RubyObject final XmlDocument doc = document(context.runtime); for (int i = 0; i < nodeMap.getLength(); i++) { - if ((doc instanceof HtmlDocument) || !NokogiriHelpers.isNamespace(nodeMap.item(i))) { + if ((doc instanceof Html4Document) || !NokogiriHelpers.isNamespace(nodeMap.item(i))) { attr.append(getCachedNodeOrCreate(runtime, nodeMap.item(i))); } } @@ -811,8 +828,8 @@ public class XmlNode extends RubyObject XmlDocument document = document(runtime); if (document == null) { return context.nil; } - if (document instanceof HtmlDocument) { - klass = getNokogiriClass(runtime, "Nokogiri::HTML::Document"); + if (document instanceof Html4Document) { + klass = getNokogiriClass(runtime, "Nokogiri::HTML4::Document"); ctx = new HtmlDomParserContext(runtime, options); ((HtmlDomParserContext) ctx).enableDocumentFragment(); ctx.setStringInputSource(context, str, context.nil); @@ -824,7 +841,7 @@ public class XmlNode extends RubyObject // TODO: for some reason, document.getEncoding() can be null or nil (don't know why) // run `test_parse_with_unparented_html_text_context_node' few times to see this happen - if (document instanceof HtmlDocument && !(document.getEncoding() == null || document.getEncoding().isNil())) { + if (document instanceof Html4Document && !(document.getEncoding() == null || document.getEncoding().isNil())) { HtmlDomParserContext htmlCtx = (HtmlDomParserContext) ctx; htmlCtx.setEncoding(document.getEncoding().asJavaString()); } @@ -1148,7 +1165,7 @@ public class XmlNode extends RubyObject namespace(ThreadContext context) { final XmlDocument doc = document(context.runtime); - if (doc instanceof HtmlDocument) { return context.nil; } + if (doc instanceof Html4Document) { return context.nil; } String namespaceURI = node.getNamespaceURI(); if (namespaceURI == null || namespaceURI.isEmpty()) { @@ -1183,7 +1200,7 @@ public class XmlNode extends RubyObject // updated. final XmlDocument doc = document(context.runtime); if (doc == null) { return context.runtime.newEmptyArray(); } - if (doc instanceof HtmlDocument) { return context.runtime.newEmptyArray(); } + if (doc instanceof Html4Document) { return context.runtime.newEmptyArray(); } List namespaces = doc.getNamespaceCache().get(node); return context.runtime.newArray((List) namespaces); @@ -1199,7 +1216,7 @@ public class XmlNode extends RubyObject { final XmlDocument doc = document(context.runtime); if (doc == null) { return context.runtime.newEmptyArray(); } - if (doc instanceof HtmlDocument) { return context.runtime.newEmptyArray(); } + if (doc instanceof Html4Document) { return context.runtime.newEmptyArray(); } Node previousNode; if (node.getNodeType() == Node.ELEMENT_NODE) { @@ -1335,7 +1352,7 @@ public class XmlNode extends RubyObject private boolean isHtmlDoc(ThreadContext context) { - return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.runtime, "Nokogiri::HTML::Document")); + return document(context).getMetaClass().isKindOfModule(getNokogiriClass(context.runtime, "Nokogiri::HTML4::Document")); } private boolean @@ -1516,61 +1533,65 @@ public class XmlNode extends RubyObject { String type; switch (node.getNodeType()) { - case Node.ELEMENT_NODE: - if (this instanceof XmlElementDecl) { - type = "ELEMENT_DECL"; - } else if (this instanceof XmlAttributeDecl) { - type = "ATTRIBUTE_DECL"; - } else if (this instanceof XmlEntityDecl) { - type = "ENTITY_DECL"; - } else { - type = "ELEMENT_NODE"; - } - break; - case Node.ATTRIBUTE_NODE: - type = "ATTRIBUTE_NODE"; - break; - case Node.TEXT_NODE: - type = "TEXT_NODE"; - break; - case Node.CDATA_SECTION_NODE: - type = "CDATA_SECTION_NODE"; - break; - case Node.ENTITY_REFERENCE_NODE: - type = "ENTITY_REF_NODE"; - break; - case Node.ENTITY_NODE: - type = "ENTITY_NODE"; - break; - case Node.PROCESSING_INSTRUCTION_NODE: - type = "PI_NODE"; - break; - case Node.COMMENT_NODE: - type = "COMMENT_NODE"; - break; - case Node.DOCUMENT_NODE: - if (this instanceof HtmlDocument) { - type = "HTML_DOCUMENT_NODE"; - } else { - type = "DOCUMENT_NODE"; - } - break; - case Node.DOCUMENT_TYPE_NODE: - type = "DOCUMENT_TYPE_NODE"; - break; - case Node.DOCUMENT_FRAGMENT_NODE: - type = "DOCUMENT_FRAG_NODE"; - break; - case Node.NOTATION_NODE: - type = "NOTATION_NODE"; - break; - default: - return context.runtime.newFixnum(0); + case Node.ELEMENT_NODE: + if (this instanceof XmlElementDecl) { + type = "ELEMENT_DECL"; + } else if (this instanceof XmlAttributeDecl) { + type = "ATTRIBUTE_DECL"; + } else if (this instanceof XmlEntityDecl) { + type = "ENTITY_DECL"; + } else { + type = "ELEMENT_NODE"; + } + break; + case Node.ATTRIBUTE_NODE: + type = "ATTRIBUTE_NODE"; + break; + case Node.TEXT_NODE: + type = "TEXT_NODE"; + break; + case Node.CDATA_SECTION_NODE: + type = "CDATA_SECTION_NODE"; + break; + case Node.ENTITY_REFERENCE_NODE: + type = "ENTITY_REF_NODE"; + break; + case Node.ENTITY_NODE: + type = "ENTITY_NODE"; + break; + case Node.PROCESSING_INSTRUCTION_NODE: + type = "PI_NODE"; + break; + case Node.COMMENT_NODE: + type = "COMMENT_NODE"; + break; + case Node.DOCUMENT_NODE: + if (this instanceof Html4Document) { + type = "HTML_DOCUMENT_NODE"; + } else { + type = "DOCUMENT_NODE"; + } + break; + case Node.DOCUMENT_TYPE_NODE: + type = "DOCUMENT_TYPE_NODE"; + break; + case Node.DOCUMENT_FRAGMENT_NODE: + type = "DOCUMENT_FRAG_NODE"; + break; + case Node.NOTATION_NODE: + type = "NOTATION_NODE"; + break; + default: + return context.runtime.newFixnum(0); } return getNokogiriClass(context.runtime, "Nokogiri::XML::Node").getConstant(type); } + /* + * NOTE that the behavior of this function is very difference from the CRuby implementation, see + * the docstring in ext/nokogiri/xml_node.c for details. + */ @JRubyMethod public IRubyObject line(ThreadContext context) @@ -1578,7 +1599,10 @@ public class XmlNode extends RubyObject Node root = getOwnerDocument(); int[] counter = new int[1]; count(root, counter); - return RubyFixnum.newFixnum(context.runtime, counter[0] + 1); + // offset of 2: + // - one because humans start counting at 1 not zero + // - one to account for the XML declaration present in the output + return RubyFixnum.newFixnum(context.runtime, counter[0] + 2); } private boolean @@ -1591,9 +1615,14 @@ public class XmlNode extends RubyObject NodeList list = node.getChildNodes(); for (int jchild = 0; jchild < list.getLength(); jchild++) { Node child = list.item(jchild); + String text = null; if (child instanceof Text) { - String text = ((Text)child).getData(); + text = ((Text)child).getData(); + } else if (child instanceof Comment) { + text = ((Comment)child).getData(); + } + if (text != null) { int textLength = text.length(); for (int jchar = 0; jchar < textLength; jchar++) { if (text.charAt(jchar) == '\n') { @@ -1676,23 +1705,23 @@ protected enum AdoptScheme { Node parent = thisNode.getParentNode(); switch (scheme) { - case CHILD: - Node[] children = adoptAsChild(thisNode, otherNode); - if (children.length == 1 && otherNode == children[0]) { + case CHILD: + Node[] children = adoptAsChild(thisNode, otherNode); + if (children.length == 1 && otherNode == children[0]) { + break; + } else { + nodeOrTags = nodeArrayToRubyArray(context.runtime, children); + } + break; + case PREV_SIBLING: + adoptAsPrevSibling(context, parent, thisNode, otherNode); + break; + case NEXT_SIBLING: + adoptAsNextSibling(context, parent, thisNode, otherNode); + break; + case REPLACEMENT: + adoptAsReplacement(context, parent, thisNode, otherNode); break; - } else { - nodeOrTags = nodeArrayToRubyArray(context.runtime, children); - } - break; - case PREV_SIBLING: - adoptAsPrevSibling(context, parent, thisNode, otherNode); - break; - case NEXT_SIBLING: - adoptAsNextSibling(context, parent, thisNode, otherNode); - break; - case REPLACEMENT: - adoptAsReplacement(context, parent, thisNode, otherNode); - break; } } catch (Exception e) { throw context.runtime.newRuntimeError(e.toString()); @@ -1743,24 +1772,11 @@ protected enum AdoptScheme { e.appendChild(otherNode); otherNode = e; } else { - addNamespaceURIIfNeeded(otherNode); parent.appendChild(otherNode); } return new Node[] { otherNode }; } - private void - addNamespaceURIIfNeeded(Node child) - { - if (this instanceof XmlDocumentFragment && ((XmlDocumentFragment) this).getFragmentContext() != null) { - XmlElement fragmentContext = ((XmlDocumentFragment) this).getFragmentContext(); - String namespace_uri = fragmentContext.node.getNamespaceURI(); - if (namespace_uri != null && namespace_uri.length() > 0) { - NokogiriHelpers.renameNode(child, namespace_uri, child.getNodeName()); - } - } - } - protected void adoptAsPrevSibling(ThreadContext context, Node parent, diff --git a/ext/java/nokogiri/XmlNodeSet.java b/ext/java/nokogiri/XmlNodeSet.java index 731a5ef8bf..d4b33b3e16 100644 --- a/ext/java/nokogiri/XmlNodeSet.java +++ b/ext/java/nokogiri/XmlNodeSet.java @@ -190,10 +190,6 @@ public class XmlNodeSet extends RubyObject implements NodeList result[last++] = n; } - if (nodeOrNamespace instanceof XmlNamespace) { - ((XmlNamespace) nodeOrNamespace).deleteHref(); - } - nodes = Arrays.copyOf(result, last); if (nodes.length < orig.length) { diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index 122831f6e1..dff65b4a1e 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -141,9 +141,17 @@ public class XmlReader extends RubyObject public IRubyObject attribute_nodes(ThreadContext context) { + context.runtime.getWarnings().warn("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); return currentNode().getAttributesNodes(); } + @JRubyMethod + public IRubyObject + attribute_hash(ThreadContext context) + { + return currentNode().getAttributes(context); + } + @JRubyMethod(name = "attributes?") public IRubyObject attributes_p(ThreadContext context) @@ -184,6 +192,18 @@ public class XmlReader extends RubyObject return RubyBoolean.newBoolean(context.getRuntime(), !readerNode.hasChildren); } + @JRubyMethod + public IRubyObject + encoding(ThreadContext context) + { + IRubyObject constructor_encoding = getInstanceVariable("@encoding"); + if (!constructor_encoding.isNil()) { + return constructor_encoding; + } + // TODO: get the parser's detected encoding + return context.getRuntime().getNil(); + } + @JRubyMethod(meta = true, rest = true) public static IRubyObject from_io(ThreadContext context, IRubyObject cls, IRubyObject args[]) diff --git a/ext/java/nokogiri/XmlSaxParserContext.java b/ext/java/nokogiri/XmlSaxParserContext.java index 920b38e964..8cca652ca0 100644 --- a/ext/java/nokogiri/XmlSaxParserContext.java +++ b/ext/java/nokogiri/XmlSaxParserContext.java @@ -1,33 +1,23 @@ package nokogiri; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.IOException; -import java.io.InputStream; - +import nokogiri.internals.*; import org.apache.xerces.parsers.AbstractSAXParser; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyFixnum; -import org.jruby.RubyModule; -import org.jruby.RubyObjectAdapter; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; -import org.jruby.javasupport.JavaEmbedUtils; +import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import org.xml.sax.ContentHandler; -import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; -import org.xml.sax.SAXNotRecognizedException; -import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; -import nokogiri.internals.NokogiriHandler; -import nokogiri.internals.NokogiriHelpers; -import nokogiri.internals.ParserContext; -import nokogiri.internals.XmlSaxParser; +import java.io.IOException; +import java.io.InputStream; + +import static org.jruby.runtime.Helpers.invoke; /** * Base class for the SAX parsers. @@ -51,6 +41,7 @@ public class XmlSaxParserContext extends ParserContext protected AbstractSAXParser parser; protected NokogiriHandler handler; + protected NokogiriErrorHandler errorHandler; private boolean replaceEntities = true; private boolean recovery = false; @@ -139,9 +130,12 @@ public class XmlSaxParserContext extends ParserContext parse_io(ThreadContext context, IRubyObject klazz, IRubyObject data, - IRubyObject enc) + IRubyObject encoding) { - //int encoding = (int)enc.convertToInteger().getLongValue(); + // check the type of the unused encoding to match behavior of CRuby + if (!(encoding instanceof RubyFixnum)) { + throw context.getRuntime().newTypeError("encoding must be kind_of String"); + } final Ruby runtime = context.runtime; XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz); ctx.initialize(runtime); @@ -168,31 +162,12 @@ public class XmlSaxParserContext extends ParserContext return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz); } - /** - * Set a property of the underlying parser. - */ - protected void - setProperty(String key, Object val) - throws SAXNotRecognizedException, SAXNotSupportedException - { - parser.setProperty(key, val); - } - - protected void - setContentHandler(ContentHandler handler) - { - parser.setContentHandler(handler); - } - - protected void - setErrorHandler(ErrorHandler handler) - { - parser.setErrorHandler(handler); - } - public final NokogiriHandler getNokogiriHandler() { return handler; } + public final NokogiriErrorHandler + getNokogiriErrorHandler() { return errorHandler; } + /** * Perform any initialization prior to parsing with the handler * handlerRuby. Convenience hook for subclasses. @@ -223,6 +198,17 @@ public class XmlSaxParserContext extends ParserContext parser.parse(getInputSource()); } + protected static Options + defaultParseOptions(ThreadContext context) + { + return new ParserContext.Options( + RubyFixnum.fix2long(Helpers.invoke(context, + ((RubyClass)context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions")) + .getConstant("DEFAULT_XML"), + "to_i")) + ); + } + @JRubyMethod public IRubyObject parse_with(ThreadContext context, IRubyObject handlerRuby) @@ -233,14 +219,19 @@ public class XmlSaxParserContext extends ParserContext throw runtime.newArgumentError("argument must respond_to document"); } - NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby); - preParse(runtime, handlerRuby, handler); + /* TODO: how should we pass in parse options? */ + ParserContext.Options options = defaultParseOptions(context); - setContentHandler(handler); - setErrorHandler(handler); + errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning); + handler = new NokogiriHandler(runtime, handlerRuby, errorHandler); + + preParse(runtime, handlerRuby, handler); + parser.setContentHandler(handler); + parser.setErrorHandler(handler); + parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options)); try { - setProperty("http://xml.org/sax/properties/lexical-handler", handler); + parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler); } catch (Exception ex) { throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString()); } @@ -270,8 +261,6 @@ public class XmlSaxParserContext extends ParserContext postParse(runtime, handlerRuby, handler); - //maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby); - return runtime.getNil(); } @@ -319,53 +308,6 @@ public class XmlSaxParserContext extends ParserContext return context.runtime.newBoolean(recovery); } - /** - * If the handler's document is a FragmentHandler, attempt to trim - * leading and trailing whitespace. - * - * This is a bit hackish and depends heavily on the internals of - * FragmentHandler. - */ - protected void - maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser) - { - RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter(); - RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler"); - - IRubyObject handler = adapter.getInstanceVariable(parser, "@document"); - if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) { - return; - } - IRubyObject stack = adapter.getInstanceVariable(handler, "@stack"); - if (stack == null || stack.isNil()) { - return; - } - // doc is finally a DocumentFragment whose nodes we can check - IRubyObject doc = adapter.callMethod(stack, "first"); - if (doc == null || doc.isNil()) { - return; - } - - IRubyObject children; - - for (;;) { - children = adapter.callMethod(doc, "children"); - IRubyObject first = adapter.callMethod(children, "first"); - if (NokogiriHelpers.isBlank(first)) { adapter.callMethod(first, "unlink"); } - else { break; } - } - - for (;;) { - children = adapter.callMethod(doc, "children"); - IRubyObject last = adapter.callMethod(children, "last"); - if (NokogiriHelpers.isBlank(last)) { adapter.callMethod(last, "unlink"); } - else { break; } - } - - // While we have a document, normalize it. - ((XmlNode) doc).normalize(); - } - @JRubyMethod(name = "column") public IRubyObject column(ThreadContext context) @@ -383,5 +325,4 @@ public class XmlSaxParserContext extends ParserContext if (number == null) { return context.getRuntime().getNil(); } return RubyFixnum.newFixnum(context.getRuntime(), number.longValue()); } - } diff --git a/ext/java/nokogiri/XmlSaxPushParser.java b/ext/java/nokogiri/XmlSaxPushParser.java index 8fa419b5e5..81bbb0c5bc 100644 --- a/ext/java/nokogiri/XmlSaxPushParser.java +++ b/ext/java/nokogiri/XmlSaxPushParser.java @@ -1,20 +1,9 @@ package nokogiri; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; - +import nokogiri.internals.*; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyException; import org.jruby.RubyObject; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; @@ -22,11 +11,14 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import nokogiri.internals.ClosedStreamException; -import nokogiri.internals.NokogiriBlockingQueueInputStream; -import nokogiri.internals.NokogiriHandler; -import nokogiri.internals.NokogiriHelpers; -import nokogiri.internals.ParserContext; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.*; + +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static org.jruby.runtime.Helpers.invoke; /** * Class for Nokogiri::XML::SAX::PushParser @@ -159,7 +151,8 @@ public class XmlSaxPushParser extends RubyObject if (!options.recover && parserTask.getErrorCount() > errorCount0) { terminateTask(context.runtime); - throw ex = parserTask.getLastError(); + ex = parserTask.getLastError().toThrowable(); + throw ex; } return this; @@ -211,7 +204,7 @@ public Thread newThread(Runnable r) { futureTask = null; } - // SHARED for HtmlSaxPushParser + // SHARED for Html4SaxPushParser static void terminateExecution(final ExecutorService executor, final NokogiriBlockingQueueInputStream stream, final FutureTask futureTask) @@ -248,7 +241,7 @@ static class ParserTask extends ParserContext.ParserTask this(context, handler, parse(context.runtime, stream), stream); } - // IMPL with HtmlSaxPushParser + // IMPL with Html4SaxPushParser protected ParserTask(ThreadContext context, IRubyObject handler, XmlSaxParserContext parser, InputStream stream) { @@ -278,16 +271,15 @@ static class ParserTask extends ParserContext.ParserTask getErrorCount() { // check for null because thread may not have started yet - if (parser.getNokogiriHandler() == null) { return 0; } - return parser.getNokogiriHandler().getErrorCount(); + if (parser.getNokogiriErrorHandler() == null) { return 0; } + return parser.getNokogiriErrorHandler().getErrors().size(); } - synchronized final RaiseException + synchronized final RubyException getLastError() { - return parser.getNokogiriHandler().getLastError(); + List errors = parser.getNokogiriErrorHandler().getErrors(); + return errors.get(errors.size() - 1); } - } - } diff --git a/ext/java/nokogiri/XmlSchema.java b/ext/java/nokogiri/XmlSchema.java index 66cdff58d0..ff8e38fe70 100644 --- a/ext/java/nokogiri/XmlSchema.java +++ b/ext/java/nokogiri/XmlSchema.java @@ -276,7 +276,7 @@ private class SchemaResourceResolver implements LSResourceResolver String systemId, String baseURI) { - if (noNet && (systemId.startsWith("http://") || systemId.startsWith("ftp://"))) { + if (noNet && systemId != null && (systemId.startsWith("http://") || systemId.startsWith("ftp://"))) { if (systemId.startsWith(XMLConstants.W3C_XML_SCHEMA_NS_URI)) { return null; // use default resolver } diff --git a/ext/java/nokogiri/XmlSyntaxError.java b/ext/java/nokogiri/XmlSyntaxError.java index f87c7d2e18..2555d4ca42 100644 --- a/ext/java/nokogiri/XmlSyntaxError.java +++ b/ext/java/nokogiri/XmlSyntaxError.java @@ -63,7 +63,7 @@ public class XmlSyntaxError extends RubyException public static XmlSyntaxError createHTMLSyntaxError(final Ruby runtime) { - RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::HTML::SyntaxError"); + RubyClass klazz = (RubyClass) runtime.getClassFromPath("Nokogiri::HTML4::SyntaxError"); return new XmlSyntaxError(runtime, klazz); } diff --git a/ext/java/nokogiri/XmlXpathContext.java b/ext/java/nokogiri/XmlXpathContext.java index b65e1d9361..1f720c0806 100644 --- a/ext/java/nokogiri/XmlXpathContext.java +++ b/ext/java/nokogiri/XmlXpathContext.java @@ -203,15 +203,15 @@ public class XmlXpathContext extends RubyObject } switch (xobj.getType()) { - case XObject.CLASS_BOOLEAN : - return context.runtime.newBoolean(xobj.bool()); - case XObject.CLASS_NUMBER : - return context.runtime.newFloat(xobj.num()); - case XObject.CLASS_NODESET : - IRubyObject[] nodes = nodeListToRubyArray(context.runtime, xobj.nodelist()); - return XmlNodeSet.newNodeSet(context.runtime, nodes, this.context); - default : - return context.runtime.newString(xobj.str()); + case XObject.CLASS_BOOLEAN : + return context.runtime.newBoolean(xobj.bool()); + case XObject.CLASS_NUMBER : + return context.runtime.newFloat(xobj.num()); + case XObject.CLASS_NODESET : + IRubyObject[] nodes = nodeListToRubyArray(context.runtime, xobj.nodelist()); + return XmlNodeSet.newNodeSet(context.runtime, nodes, this.context); + default : + return context.runtime.newString(xobj.str()); } } diff --git a/ext/java/nokogiri/XsltStylesheet.java b/ext/java/nokogiri/XsltStylesheet.java index e56cdc1851..d5329f120b 100644 --- a/ext/java/nokogiri/XsltStylesheet.java +++ b/ext/java/nokogiri/XsltStylesheet.java @@ -213,12 +213,12 @@ public class XsltStylesheet extends RubyObject } switch (elistener.getErrorType()) { - case ERROR: - case FATAL: - throw runtime.newRuntimeError(elistener.getErrorMessage()); - case WARNING: - default: - // no-op + case ERROR: + case FATAL: + throw runtime.newRuntimeError(elistener.getErrorMessage()); + case WARNING: + default: + // no-op } if (stringResult == null) { @@ -286,7 +286,7 @@ public class XsltStylesheet extends RubyObject createDocumentFromDomResult(ThreadContext context, Ruby runtime, DOMResult domResult) { if ("html".equals(domResult.getNode().getFirstChild().getNodeName())) { - return new HtmlDocument(context.runtime, (Document) domResult.getNode()); + return new Html4Document(context.runtime, (Document) domResult.getNode()); } else { return new XmlDocument(context.runtime, (Document) domResult.getNode()); } @@ -322,7 +322,7 @@ public class XsltStylesheet extends RubyObject RubyClass parse_options = (RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions"); if (htmlish) { args[3] = parse_options.getConstant("DEFAULT_HTML"); - RubyClass htmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::HTML::Document"); + RubyClass htmlDocumentClass = getNokogiriClass(runtime, "Nokogiri::HTML4::Document"); return Helpers.invoke(context, htmlDocumentClass, "parse", args); } else { args[3] = parse_options.getConstant("DEFAULT_XML"); diff --git a/ext/java/nokogiri/internals/HtmlDomParserContext.java b/ext/java/nokogiri/internals/HtmlDomParserContext.java index 20200a929c..80c2f96ee9 100644 --- a/ext/java/nokogiri/internals/HtmlDomParserContext.java +++ b/ext/java/nokogiri/internals/HtmlDomParserContext.java @@ -4,7 +4,7 @@ import static nokogiri.internals.NokogiriHelpers.isNamespace; import static nokogiri.internals.NokogiriHelpers.stringOrNil; -import nokogiri.HtmlDocument; +import nokogiri.Html4Document; import nokogiri.NokogiriService; import nokogiri.XmlDocument; import nokogiri.XmlSyntaxError; @@ -28,7 +28,7 @@ import org.w3c.dom.NodeList; /** - * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml. + * Parser for Html4Document. This class actually parses Html4Document using NekoHtml. * * @author sergio * @author Patrick Mahoney @@ -115,12 +115,12 @@ public class HtmlDomParserContext extends XmlDomParserContext protected XmlDocument wrapDocument(ThreadContext context, RubyClass klass, Document document) { - HtmlDocument htmlDocument = new HtmlDocument(context.runtime, klass, document); + Html4Document htmlDocument = new Html4Document(context.runtime, klass, document); htmlDocument.setDocumentNode(context.runtime, document); Helpers.invoke(context, htmlDocument, "initialize"); if (ruby_encoding.isNil()) { - // ruby_encoding might have detected by HtmlDocument::EncodingReader + // ruby_encoding might have detected by Html4Document::EncodingReader if (detected_encoding != null && !detected_encoding.isNil()) { ruby_encoding = detected_encoding; } else { diff --git a/ext/java/nokogiri/internals/NokogiriEntityResolver.java b/ext/java/nokogiri/internals/NokogiriEntityResolver.java index 6ee5de87bd..eb9a8b5b3d 100644 --- a/ext/java/nokogiri/internals/NokogiriEntityResolver.java +++ b/ext/java/nokogiri/internals/NokogiriEntityResolver.java @@ -85,7 +85,7 @@ public class NokogiriEntityResolver implements EntityResolver2 private void addError(String errorMessage) { - if (handler != null) { handler.errors.add(new Exception(errorMessage)); } + if (handler != null) { handler.addError(new Exception(errorMessage)); } } /** diff --git a/ext/java/nokogiri/internals/NokogiriErrorHandler.java b/ext/java/nokogiri/internals/NokogiriErrorHandler.java index 51d8e05dae..9c4683ee48 100644 --- a/ext/java/nokogiri/internals/NokogiriErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriErrorHandler.java @@ -1,11 +1,15 @@ package nokogiri.internals; -import java.util.ArrayList; -import java.util.List; - +import nokogiri.XmlSyntaxError; import org.apache.xerces.xni.parser.XMLErrorHandler; +import org.jruby.Ruby; +import org.jruby.RubyException; +import org.jruby.exceptions.RaiseException; import org.xml.sax.ErrorHandler; +import java.util.ArrayList; +import java.util.List; + /** * Super class of error handlers. * @@ -17,23 +21,40 @@ */ public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler { - protected final List errors; + private final Ruby runtime; + protected final List errors; protected boolean noerror; protected boolean nowarning; public - NokogiriErrorHandler(boolean noerror, boolean nowarning) + NokogiriErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) { - this.errors = new ArrayList(4); + this.runtime = runtime; + this.errors = new ArrayList(4); this.noerror = noerror; this.nowarning = nowarning; } - List + public List getErrors() { return errors; } public void - addError(Exception ex) { errors.add(ex); } + addError(Exception ex) + { + addError(XmlSyntaxError.createXMLSyntaxError(runtime, ex)); + } + + public void + addError(RubyException ex) + { + errors.add(ex); + } + + public void + addError(RaiseException ex) + { + addError(ex.getException()); + } protected boolean usesNekoHtml(String domain) diff --git a/ext/java/nokogiri/internals/NokogiriHandler.java b/ext/java/nokogiri/internals/NokogiriHandler.java index 897a6dcf13..fdc57f4f88 100644 --- a/ext/java/nokogiri/internals/NokogiriHandler.java +++ b/ext/java/nokogiri/internals/NokogiriHandler.java @@ -38,28 +38,22 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler private final Ruby runtime; private final RubyClass attrClass; private final IRubyObject object; - - /** - * Stores parse errors with the most-recent error last. - * - * TODO: should these be stored in the document 'errors' array? - * Currently only string messages are stored there. - */ - private final LinkedList errors = new LinkedList(); + private NokogiriErrorHandler errorHandler; private Locator locator; private boolean needEmptyAttrCheck; public - NokogiriHandler(Ruby runtime, IRubyObject object) + NokogiriHandler(Ruby runtime, IRubyObject object, NokogiriErrorHandler errorHandler) { assert object != null; this.runtime = runtime; this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute"); this.object = object; + this.errorHandler = errorHandler; charactersBuilder = new StringBuilder(); String objectName = object.getMetaClass().getName(); - if ("Nokogiri::HTML::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; } + if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; } } @Override @@ -253,9 +247,9 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler try { final String msg = ex.getMessage(); call("error", runtime.newString(msg == null ? "" : msg)); - addError(XmlSyntaxError.createError(runtime, ex).toThrowable()); + errorHandler.addError(ex); } catch (RaiseException e) { - addError(e); + errorHandler.addError(e); throw e; } } @@ -282,22 +276,10 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler call("warning", runtime.newString(msg == null ? "" : msg)); } - protected synchronized void - addError(RaiseException e) - { - errors.add(e); - } - public synchronized int getErrorCount() { - return errors.size(); - } - - public synchronized RaiseException - getLastError() - { - return errors.getLast(); + return errorHandler.getErrors().size(); } private void diff --git a/ext/java/nokogiri/internals/NokogiriHelpers.java b/ext/java/nokogiri/internals/NokogiriHelpers.java index a211ffbff1..c506bba394 100644 --- a/ext/java/nokogiri/internals/NokogiriHelpers.java +++ b/ext/java/nokogiri/internals/NokogiriHelpers.java @@ -25,7 +25,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; -import nokogiri.HtmlDocument; +import nokogiri.Html4Document; import nokogiri.NokogiriService; import nokogiri.XmlAttr; import nokogiri.XmlCdata; @@ -89,7 +89,7 @@ public class NokogiriHelpers if (node == null) { return runtime.getNil(); } if (node.getNodeType() == Node.ATTRIBUTE_NODE && isNamespace(node.getNodeName())) { XmlDocument xmlDocument = (XmlDocument) node.getOwnerDocument().getUserData(CACHED_NODE); - if (!(xmlDocument instanceof HtmlDocument)) { + if (!(xmlDocument instanceof Html4Document)) { String prefix = getLocalNameForNamespace(((Attr) node).getName(), null); String href = ((Attr) node).getValue(); XmlNamespace xmlNamespace = xmlDocument.getNamespaceCache().get(prefix, href); @@ -116,59 +116,59 @@ public class NokogiriHelpers if (node == null) { return runtime.getNil(); } // this is slow; need a way to cache nokogiri classes/modules somewhere switch (node.getNodeType()) { - case Node.ELEMENT_NODE: - XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::Element")); - xmlElement.setNode(runtime, node); - return xmlElement; - case Node.ATTRIBUTE_NODE: - XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::Attr")); - xmlAttr.setNode(runtime, node); - return xmlAttr; - case Node.TEXT_NODE: - XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::Text")); - xmlText.setNode(runtime, node); - return xmlText; - case Node.COMMENT_NODE: - XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::Comment")); - xmlComment.setNode(runtime, node); - return xmlComment; - case Node.ENTITY_NODE: - return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node); - case Node.ENTITY_REFERENCE_NODE: - XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, - getNokogiriClass(runtime, "Nokogiri::XML::EntityReference")); - xmlEntityRef.setNode(runtime, node); - return xmlEntityRef; - case Node.PROCESSING_INSTRUCTION_NODE: - XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) - NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::ProcessingInstruction")); - xmlProcessingInstruction.setNode(runtime, node); - return xmlProcessingInstruction; - case Node.CDATA_SECTION_NODE: - XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::CDATA")); - xmlCdata.setNode(runtime, node); - return xmlCdata; - case Node.DOCUMENT_NODE: - XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, - getNokogiriClass(runtime, "Nokogiri::XML::Document")); - xmlDocument.setDocumentNode(runtime, (Document) node); - return xmlDocument; - case Node.DOCUMENT_TYPE_NODE: - XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::DTD")); - xmlDtd.setNode(runtime, node); - return xmlDtd; - default: - XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, - "Nokogiri::XML::Node")); - xmlNode.setNode(runtime, node); - return xmlNode; + case Node.ELEMENT_NODE: + XmlElement xmlElement = (XmlElement) NokogiriService.XML_ELEMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Element")); + xmlElement.setNode(runtime, node); + return xmlElement; + case Node.ATTRIBUTE_NODE: + XmlAttr xmlAttr = (XmlAttr) NokogiriService.XML_ATTR_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Attr")); + xmlAttr.setNode(runtime, node); + return xmlAttr; + case Node.TEXT_NODE: + XmlText xmlText = (XmlText) NokogiriService.XML_TEXT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Text")); + xmlText.setNode(runtime, node); + return xmlText; + case Node.COMMENT_NODE: + XmlComment xmlComment = (XmlComment) NokogiriService.XML_COMMENT_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Comment")); + xmlComment.setNode(runtime, node); + return xmlComment; + case Node.ENTITY_NODE: + return new XmlNode(runtime, getNokogiriClass(runtime, "Nokogiri::XML::EntityDecl"), node); + case Node.ENTITY_REFERENCE_NODE: + XmlEntityReference xmlEntityRef = (XmlEntityReference) NokogiriService.XML_ENTITY_REFERENCE_ALLOCATOR.allocate(runtime, + getNokogiriClass(runtime, "Nokogiri::XML::EntityReference")); + xmlEntityRef.setNode(runtime, node); + return xmlEntityRef; + case Node.PROCESSING_INSTRUCTION_NODE: + XmlProcessingInstruction xmlProcessingInstruction = (XmlProcessingInstruction) + NokogiriService.XML_PROCESSING_INSTRUCTION_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::ProcessingInstruction")); + xmlProcessingInstruction.setNode(runtime, node); + return xmlProcessingInstruction; + case Node.CDATA_SECTION_NODE: + XmlCdata xmlCdata = (XmlCdata) NokogiriService.XML_CDATA_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::CDATA")); + xmlCdata.setNode(runtime, node); + return xmlCdata; + case Node.DOCUMENT_NODE: + XmlDocument xmlDocument = (XmlDocument) NokogiriService.XML_DOCUMENT_ALLOCATOR.allocate(runtime, + getNokogiriClass(runtime, "Nokogiri::XML::Document")); + xmlDocument.setDocumentNode(runtime, (Document) node); + return xmlDocument; + case Node.DOCUMENT_TYPE_NODE: + XmlDtd xmlDtd = (XmlDtd) NokogiriService.XML_DTD_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::DTD")); + xmlDtd.setNode(runtime, node); + return xmlDtd; + default: + XmlNode xmlNode = (XmlNode) NokogiriService.XML_NODE_ALLOCATOR.allocate(runtime, getNokogiriClass(runtime, + "Nokogiri::XML::Node")); + xmlNode.setNode(runtime, node); + return xmlNode; } } @@ -723,8 +723,8 @@ public class NokogiriHelpers public static CharSequence convertEncodingByNKFIfNecessary(ThreadContext context, XmlDocument doc, CharSequence str) { - if (!(doc instanceof HtmlDocument)) { return str; } - String parsed_encoding = ((HtmlDocument)doc).getPraedEncoding(); + if (!(doc instanceof Html4Document)) { return str; } + String parsed_encoding = ((Html4Document)doc).getPraedEncoding(); if (parsed_encoding == null) { return str; } String ruby_encoding = rubyStringToString(doc.getEncoding()); if (ruby_encoding == null) { return str; } diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java index 1a9e5af490..bc907ddf95 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java @@ -1,6 +1,7 @@ package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -13,21 +14,21 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler { public - NokogiriNonStrictErrorHandler(boolean noerror, boolean nowarning) + NokogiriNonStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) { - super(noerror, nowarning); + super(runtime, noerror, nowarning); } public void warning(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } public void error(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } public void @@ -38,7 +39,7 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler // found in the prolog, instead it will keep calling this method and we'll // keep inserting the error in the document errors array until we run // out of memory - errors.add(ex); + addError(ex); String message = ex.getMessage(); // The problem with Xerces is that some errors will cause the @@ -53,19 +54,19 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler public void error(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } public void fatalError(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } public void warning(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } /* diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java index 286820a423..152ee4657c 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java @@ -1,6 +1,7 @@ package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -20,15 +21,15 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler { public - NokogiriNonStrictErrorHandler4NekoHtml(boolean nowarning) + NokogiriNonStrictErrorHandler4NekoHtml(Ruby runtime, boolean nowarning) { - super(false, nowarning); + super(runtime, false, nowarning); } public - NokogiriNonStrictErrorHandler4NekoHtml(boolean noerror, boolean nowarning) + NokogiriNonStrictErrorHandler4NekoHtml(Ruby runtime, boolean noerror, boolean nowarning) { - super(noerror, nowarning); + super(runtime, noerror, nowarning); } public void @@ -40,13 +41,13 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void error(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } public void fatalError(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } /** @@ -64,7 +65,7 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void error(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } /** @@ -82,7 +83,7 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void fatalError(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } /** @@ -100,7 +101,7 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void warning(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } } diff --git a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java index e5566b5787..78118d1de7 100644 --- a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java @@ -1,6 +1,7 @@ package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -14,23 +15,23 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler { public - NokogiriStrictErrorHandler(boolean noerror, boolean nowarning) + NokogiriStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) { - super(noerror, nowarning); + super(runtime, noerror, nowarning); } public void warning(SAXParseException spex) throws SAXException { if (!nowarning) { throw spex; } - else { errors.add(spex); } + else { addError(spex); } } public void error(SAXParseException spex) throws SAXException { if (!noerror) { throw spex; } - else { errors.add(spex); } + else { addError(spex); } } public void @@ -43,7 +44,7 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler error(String domain, String key, XMLParseException e) throws XMLParseException { if (!noerror) { throw e; } - else { errors.add(e); } + else { addError(e); } } public void @@ -56,6 +57,6 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler warning(String domain, String key, XMLParseException e) throws XMLParseException { if (!nowarning) { throw e; } - if (!usesNekoHtml(domain)) { errors.add(e); } + else { addError(e); } } } diff --git a/ext/java/nokogiri/internals/ParserContext.java b/ext/java/nokogiri/internals/ParserContext.java index 6eda7be5e2..b8d393a520 100644 --- a/ext/java/nokogiri/internals/ParserContext.java +++ b/ext/java/nokogiri/internals/ParserContext.java @@ -58,6 +58,12 @@ public abstract class ParserContext extends RubyObject source = new InputSource(); ParserContext.setUrl(context, source, url); + Ruby ruby = context.getRuntime(); + + if (!(data.respondsTo("read"))) { + throw ruby.newTypeError("must respond to :read"); + } + source.setByteStream(new IOInputStream(data)); if (java_encoding != null) { source.setEncoding(java_encoding); @@ -73,7 +79,7 @@ public abstract class ParserContext extends RubyObject Ruby ruby = context.getRuntime(); if (!(data instanceof RubyString)) { - throw ruby.newArgumentError("must be kind_of String"); + throw ruby.newTypeError("must be kind_of String"); } RubyString stringData = (RubyString) data; diff --git a/ext/java/nokogiri/internals/ReaderNode.java b/ext/java/nokogiri/internals/ReaderNode.java index f671075db9..4dc17f62e4 100644 --- a/ext/java/nokogiri/internals/ReaderNode.java +++ b/ext/java/nokogiri/internals/ReaderNode.java @@ -112,9 +112,10 @@ public abstract class ReaderNode getAttributes(ThreadContext context) { final Ruby runtime = context.runtime; - if (attributeList == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (attributeList == null) { return hash; } for (int i = 0; i < attributeList.length; i++) { + if (isNamespace(attributeList.names.get(i))) { continue; } IRubyObject k = stringOrBlank(runtime, attributeList.names.get(i)); IRubyObject v = stringOrBlank(runtime, attributeList.values.get(i)); hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) @@ -150,8 +151,8 @@ public abstract class ReaderNode getNamespaces(ThreadContext context) { final Ruby runtime = context.runtime; - if (namespaces == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (namespaces == null) { return hash; } for (Map.Entry entry : namespaces.entrySet()) { IRubyObject k = stringOrBlank(runtime, entry.getKey()); IRubyObject v = stringOrBlank(runtime, entry.getValue()); diff --git a/ext/java/nokogiri/internals/SaveContextVisitor.java b/ext/java/nokogiri/internals/SaveContextVisitor.java index ced4630811..2ddb0e4279 100644 --- a/ext/java/nokogiri/internals/SaveContextVisitor.java +++ b/ext/java/nokogiri/internals/SaveContextVisitor.java @@ -296,30 +296,30 @@ public class SaveContextVisitor for (int i = 0; i < str.length(); i++) { char c; switch (c = str.charAt(i)) { - case '\n': - buffer.append(" "); - break; - case '\r': - buffer.append(" "); - break; - case '\t': - buffer.append(" "); - break; - case '"': - if (htmlDoc) { buffer.append("%22"); } - else { buffer.append("""); } - break; - case '<': - buffer.append("<"); - break; - case '>': - buffer.append(">"); - break; - case '&': - buffer.append("&"); - break; - default: - buffer.append(c); + case '\n': + buffer.append(" "); + break; + case '\r': + buffer.append(" "); + break; + case '\t': + buffer.append(" "); + break; + case '"': + if (htmlDoc) { buffer.append("%22"); } + else { buffer.append("""); } + break; + case '<': + buffer.append("<"); + break; + case '>': + buffer.append(">"); + break; + case '&': + buffer.append("&"); + break; + default: + buffer.append(c); } } @@ -673,6 +673,8 @@ public int compare(Attr attr0, Attr attr1) { if (!isEmpty(name) && noEmpty) { buffer.append("'); } + } else if (asXhtml && !isEmpty(name)) { + buffer.append("'); } if (needBreakInClosing(element)) { if (!containsText(element)) { indentation.pop(); } diff --git a/ext/java/nokogiri/internals/XmlDomParserContext.java b/ext/java/nokogiri/internals/XmlDomParserContext.java index b94d1ae1f9..3621f1f98f 100644 --- a/ext/java/nokogiri/internals/XmlDomParserContext.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -1,30 +1,24 @@ package nokogiri.internals; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static nokogiri.internals.NokogiriHelpers.isBlank; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - +import nokogiri.XmlDocument; +import nokogiri.XmlDtd; +import nokogiri.XmlSyntaxError; import org.apache.xerces.parsers.DOMParser; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyClass; -import org.jruby.RubyFixnum; +import org.jruby.*; import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Helpers; +import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; -import nokogiri.NokogiriService; -import nokogiri.XmlDocument; -import nokogiri.XmlDtd; -import nokogiri.XmlSyntaxError; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static nokogiri.internals.NokogiriHelpers.isBlank; /** * Parser class for XML DOM processing. This class actually parses XML document @@ -48,7 +42,6 @@ public class XmlDomParserContext extends ParserContext protected static final String FEATURE_NOT_EXPAND_ENTITY = "http://apache.org/xml/features/dom/create-entity-ref-nodes"; protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation"; - private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude"; private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager"; protected ParserContext.Options options; @@ -69,17 +62,17 @@ public class XmlDomParserContext extends ParserContext this.options = new ParserContext.Options(RubyFixnum.fix2long(options)); java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding); ruby_encoding = encoding; - initErrorHandler(); + initErrorHandler(runtime); initParser(runtime); } protected void - initErrorHandler() + initErrorHandler(Ruby runtime) { if (options.recover) { - errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning); + errorHandler = new NokogiriNonStrictErrorHandler(runtime, options.noError, options.noWarning); } else { - errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning); + errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning); } } @@ -161,12 +154,10 @@ public class XmlDomParserContext extends ParserContext mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) { final Ruby runtime = context.runtime; - final List errors = errorHandler.getErrors(); + final List errors = errorHandler.getErrors(); final IRubyObject[] errorsAry = new IRubyObject[errors.size()]; for (int i = 0; i < errors.size(); i++) { - XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime); - xmlSyntaxError.setException(errors.get(i)); - errorsAry[i] = xmlSyntaxError; + errorsAry[i] = errors.get(i); } return runtime.newArrayNoCopy(errorsAry); } diff --git a/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java b/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java index 214c46a286..7a81f428f5 100644 --- a/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +++ b/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java @@ -202,75 +202,75 @@ public abstract class CanonicalizerBase extends CanonicalizerSpi do { switch (currentNode.getNodeType()) { - case Node.ENTITY_NODE : - case Node.NOTATION_NODE : - case Node.ATTRIBUTE_NODE : - // illegal node type during traversal - throw new CanonicalizationException("empty"); - - case Node.DOCUMENT_FRAGMENT_NODE : - case Node.DOCUMENT_NODE : - ns.outputNodePush(); - sibling = currentNode.getFirstChild(); - break; - - case Node.COMMENT_NODE : - if (includeComments) { - outputCommentToWriter((Comment) currentNode, writer, documentLevel); - } - break; - - case Node.PROCESSING_INSTRUCTION_NODE : - outputPItoWriter((ProcessingInstruction) currentNode, writer, documentLevel); - break; + case Node.ENTITY_NODE : + case Node.NOTATION_NODE : + case Node.ATTRIBUTE_NODE : + // illegal node type during traversal + throw new CanonicalizationException("empty"); + + case Node.DOCUMENT_FRAGMENT_NODE : + case Node.DOCUMENT_NODE : + ns.outputNodePush(); + sibling = currentNode.getFirstChild(); + break; - case Node.TEXT_NODE : - case Node.CDATA_SECTION_NODE : - outputTextToWriter(currentNode.getNodeValue(), writer); - break; + case Node.COMMENT_NODE : + if (includeComments) { + outputCommentToWriter((Comment) currentNode, writer, documentLevel); + } + break; - case Node.ELEMENT_NODE : - documentLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT; - if (currentNode == excludeNode) { + case Node.PROCESSING_INSTRUCTION_NODE : + outputPItoWriter((ProcessingInstruction) currentNode, writer, documentLevel); break; - } - if (filter != null && !filter.includeNodes(currentNode, parentNode)) { + + case Node.TEXT_NODE : + case Node.CDATA_SECTION_NODE : + outputTextToWriter(currentNode.getNodeValue(), writer); break; - } - Element currentElement = (Element)currentNode; - //Add a level to the nssymbtable. So latter can be pop-back. - ns.outputNodePush(); - writer.write('<'); - String name = currentElement.getTagName(); - UtfHelpper.writeByte(name, writer, cache); - - Iterator attrs = this.handleAttributesSubtree(currentElement, ns); - if (attrs != null) { - //we output all Attrs which are available - while (attrs.hasNext()) { - Attr attr = attrs.next(); - outputAttrToWriter(attr.getNodeName(), attr.getNodeValue(), writer, cache); + case Node.ELEMENT_NODE : + documentLevel = NODE_NOT_BEFORE_OR_AFTER_DOCUMENT_ELEMENT; + if (currentNode == excludeNode) { + break; + } + if (filter != null && !filter.includeNodes(currentNode, parentNode)) { + break; + } + + Element currentElement = (Element)currentNode; + //Add a level to the nssymbtable. So latter can be pop-back. + ns.outputNodePush(); + writer.write('<'); + String name = currentElement.getTagName(); + UtfHelpper.writeByte(name, writer, cache); + + Iterator attrs = this.handleAttributesSubtree(currentElement, ns); + if (attrs != null) { + //we output all Attrs which are available + while (attrs.hasNext()) { + Attr attr = attrs.next(); + outputAttrToWriter(attr.getNodeName(), attr.getNodeValue(), writer, cache); + } } - } - writer.write('>'); - sibling = currentNode.getFirstChild(); - if (sibling == null) { - writer.write(END_TAG); - UtfHelpper.writeStringToUtf8(name, writer); writer.write('>'); - //We finished with this level, pop to the previous definitions. - ns.outputNodePop(); - if (parentNode != null) { - sibling = currentNode.getNextSibling(); + sibling = currentNode.getFirstChild(); + if (sibling == null) { + writer.write(END_TAG); + UtfHelpper.writeStringToUtf8(name, writer); + writer.write('>'); + //We finished with this level, pop to the previous definitions. + ns.outputNodePop(); + if (parentNode != null) { + sibling = currentNode.getNextSibling(); + } + } else { + parentNode = currentElement; } - } else { - parentNode = currentElement; - } - break; - case Node.DOCUMENT_TYPE_NODE : - default : - break; + break; + case Node.DOCUMENT_TYPE_NODE : + default : + break; } while (sibling == null && parentNode != null) { writer.write(END_TAG); @@ -477,37 +477,37 @@ abstract Iterator handleAttributesSubtree(Element element, NameSpaceSymbTa switch (c) { - case '&' : - toWrite = AMP; - break; + case '&' : + toWrite = AMP; + break; - case '<' : - toWrite = LT; - break; + case '<' : + toWrite = LT; + break; - case '"' : - toWrite = QUOT; - break; + case '"' : + toWrite = QUOT; + break; - case 0x09 : // '\t' - toWrite = X9; - break; + case 0x09 : // '\t' + toWrite = X9; + break; - case 0x0A : // '\n' - toWrite = XA; - break; + case 0x0A : // '\n' + toWrite = XA; + break; - case 0x0D : // '\r' - toWrite = XD; - break; + case 0x0D : // '\r' + toWrite = XD; + break; - default : - if (c < 0x80) { - writer.write(c); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - continue; + default : + if (c < 0x80) { + writer.write(c); + } else { + UtfHelpper.writeCharToUtf8(c, writer); + } + continue; } writer.write(toWrite); } @@ -629,29 +629,29 @@ abstract Iterator handleAttributesSubtree(Element element, NameSpaceSymbTa switch (c) { - case '&' : - toWrite = AMP; - break; + case '&' : + toWrite = AMP; + break; - case '<' : - toWrite = LT; - break; + case '<' : + toWrite = LT; + break; - case '>' : - toWrite = GT; - break; + case '>' : + toWrite = GT; + break; - case 0xD : - toWrite = XD; - break; + case 0xD : + toWrite = XD; + break; - default : - if (c < 0x80) { - writer.write(c); - } else { - UtfHelpper.writeCharToUtf8(c, writer); - } - continue; + default : + if (c < 0x80) { + writer.write(c); + } else { + UtfHelpper.writeCharToUtf8(c, writer); + } + continue; } writer.write(toWrite); } diff --git a/ext/java/nokogiri/internals/c14n/XMLUtils.java b/ext/java/nokogiri/internals/c14n/XMLUtils.java index 2a9190f8c2..c29de56601 100644 --- a/ext/java/nokogiri/internals/c14n/XMLUtils.java +++ b/ext/java/nokogiri/internals/c14n/XMLUtils.java @@ -195,43 +195,43 @@ public class XMLUtils final String namespaceNs = Constants.NamespaceSpecNS; do { switch (node.getNodeType()) { - case Node.ELEMENT_NODE : - Element element = (Element) node; - if (!element.hasChildNodes()) { - break; - } - if (element.hasAttributes()) { - NamedNodeMap attributes = element.getAttributes(); - int attributesLength = attributes.getLength(); - - for (Node child = element.getFirstChild(); child != null; - child = child.getNextSibling()) { + case Node.ELEMENT_NODE : + Element element = (Element) node; + if (!element.hasChildNodes()) { + break; + } + if (element.hasAttributes()) { + NamedNodeMap attributes = element.getAttributes(); + int attributesLength = attributes.getLength(); - if (child.getNodeType() != Node.ELEMENT_NODE) { - continue; - } - Element childElement = (Element) child; + for (Node child = element.getFirstChild(); child != null; + child = child.getNextSibling()) { - for (int i = 0; i < attributesLength; i++) { - Attr currentAttr = (Attr) attributes.item(i); - if (!namespaceNs.equals(currentAttr.getNamespaceURI())) { + if (child.getNodeType() != Node.ELEMENT_NODE) { continue; } - if (childElement.hasAttributeNS(namespaceNs, - currentAttr.getLocalName())) { - continue; + Element childElement = (Element) child; + + for (int i = 0; i < attributesLength; i++) { + Attr currentAttr = (Attr) attributes.item(i); + if (!namespaceNs.equals(currentAttr.getNamespaceURI())) { + continue; + } + if (childElement.hasAttributeNS(namespaceNs, + currentAttr.getLocalName())) { + continue; + } + childElement.setAttributeNS(namespaceNs, + currentAttr.getName(), + currentAttr.getNodeValue()); } - childElement.setAttributeNS(namespaceNs, - currentAttr.getName(), - currentAttr.getNodeValue()); } } - } - case Node.ENTITY_REFERENCE_NODE : - case Node.DOCUMENT_NODE : - parent = node; - sibling = node.getFirstChild(); - break; + case Node.ENTITY_REFERENCE_NODE : + case Node.DOCUMENT_NODE : + parent = node; + sibling = node.getFirstChild(); + break; } while ((sibling == null) && (parent != null)) { sibling = parent.getNextSibling(); diff --git a/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java b/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java index 36b17023dc..652caaab5d 100644 --- a/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +++ b/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java @@ -885,26 +885,26 @@ else if (PROCESSING_INSTRUCTION_NODE == nexttype) { { switch (node.getNodeType()) { - case Node.DOCUMENT_FRAGMENT_NODE : - case Node.DOCUMENT_NODE : - case Node.ELEMENT_NODE : { - for (Node child = node.getFirstChild(); null != child; - child = child.getNextSibling()) { - getNodeData(child, buf); + case Node.DOCUMENT_FRAGMENT_NODE : + case Node.DOCUMENT_NODE : + case Node.ELEMENT_NODE : { + for (Node child = node.getFirstChild(); null != child; + child = child.getNextSibling()) { + getNodeData(child, buf); + } } - } - break; - case Node.TEXT_NODE : - case Node.CDATA_SECTION_NODE : - case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node - buf.append(node.getNodeValue()); - break; - case Node.PROCESSING_INSTRUCTION_NODE : - // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); - break; - default : - // ignore break; + case Node.TEXT_NODE : + case Node.CDATA_SECTION_NODE : + case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node + buf.append(node.getNodeValue()); + break; + case Node.PROCESSING_INSTRUCTION_NODE : + // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); + break; + default : + // ignore + break; } } @@ -943,30 +943,30 @@ else if (PROCESSING_INSTRUCTION_NODE == nexttype) { short type = getNodeType(nodeHandle); switch (type) { - case DTM.NAMESPACE_NODE : { - Node node = getNode(nodeHandle); - - // assume not null. - name = node.getNodeName(); - if (name.startsWith("xmlns:")) { - name = QName.getLocalPart(name); - } else if (name.equals("xmlns")) { - name = ""; + case DTM.NAMESPACE_NODE : { + Node node = getNode(nodeHandle); + + // assume not null. + name = node.getNodeName(); + if (name.startsWith("xmlns:")) { + name = QName.getLocalPart(name); + } else if (name.equals("xmlns")) { + name = ""; + } } - } - break; - case DTM.ATTRIBUTE_NODE : - case DTM.ELEMENT_NODE : - case DTM.ENTITY_REFERENCE_NODE : - case DTM.PROCESSING_INSTRUCTION_NODE : { - Node node = getNode(nodeHandle); - - // assume not null. - name = node.getNodeName(); - } - break; - default : - name = ""; + break; + case DTM.ATTRIBUTE_NODE : + case DTM.ELEMENT_NODE : + case DTM.ENTITY_REFERENCE_NODE : + case DTM.PROCESSING_INSTRUCTION_NODE : { + Node node = getNode(nodeHandle); + + // assume not null. + name = node.getNodeName(); + } + break; + default : + name = ""; } return name; @@ -1056,29 +1056,29 @@ else if (PROCESSING_INSTRUCTION_NODE == nexttype) { short type = getNodeType(nodeHandle); switch (type) { - case DTM.NAMESPACE_NODE : { - Node node = getNode(nodeHandle); + case DTM.NAMESPACE_NODE : { + Node node = getNode(nodeHandle); - // assume not null. - String qname = node.getNodeName(); - int index = qname.indexOf(':'); + // assume not null. + String qname = node.getNodeName(); + int index = qname.indexOf(':'); - prefix = (index < 0) ? "" : qname.substring(index + 1); - } - break; - case DTM.ATTRIBUTE_NODE : - case DTM.ELEMENT_NODE : { - Node node = getNode(nodeHandle); + prefix = (index < 0) ? "" : qname.substring(index + 1); + } + break; + case DTM.ATTRIBUTE_NODE : + case DTM.ELEMENT_NODE : { + Node node = getNode(nodeHandle); - // assume not null. - String qname = node.getNodeName(); - int index = qname.indexOf(':'); + // assume not null. + String qname = node.getNodeName(); + int index = qname.indexOf(':'); - prefix = (index < 0) ? "" : qname.substring(0, index); - } - break; - default : - prefix = ""; + prefix = (index < 0) ? "" : qname.substring(0, index); + } + break; + default : + prefix = ""; } return prefix; @@ -1616,38 +1616,38 @@ else if (PROCESSING_INSTRUCTION_NODE == nexttype) { { switch (node.getNodeType()) { - case Node.DOCUMENT_FRAGMENT_NODE : - case Node.DOCUMENT_NODE : - case Node.ELEMENT_NODE : { - for (Node child = node.getFirstChild(); null != child; - child = child.getNextSibling()) { - dispatchNodeData(child, ch, depth + 1); - } - } - break; - case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% - case Node.COMMENT_NODE : - if (0 != depth) { - break; - } - // NOTE: Because this operation works in the DOM space, it does _not_ attempt - // to perform Text Coalition. That should only be done in DTM space. - case Node.TEXT_NODE : - case Node.CDATA_SECTION_NODE : - case Node.ATTRIBUTE_NODE : - String str = node.getNodeValue(); - if (ch instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) { - ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)ch).characters(node); - } else { - ch.characters(str.toCharArray(), 0, str.length()); + case Node.DOCUMENT_FRAGMENT_NODE : + case Node.DOCUMENT_NODE : + case Node.ELEMENT_NODE : { + for (Node child = node.getFirstChild(); null != child; + child = child.getNextSibling()) { + dispatchNodeData(child, ch, depth + 1); + } } break; + case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% + case Node.COMMENT_NODE : + if (0 != depth) { + break; + } + // NOTE: Because this operation works in the DOM space, it does _not_ attempt + // to perform Text Coalition. That should only be done in DTM space. + case Node.TEXT_NODE : + case Node.CDATA_SECTION_NODE : + case Node.ATTRIBUTE_NODE : + String str = node.getNodeValue(); + if (ch instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) { + ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)ch).characters(node); + } else { + ch.characters(str.toCharArray(), 0, str.length()); + } + break; // /* case Node.PROCESSING_INSTRUCTION_NODE : // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); // break; */ - default : - // ignore - break; + default : + // ignore + break; } } diff --git a/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java b/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java index a2a720ab20..232deb653d 100644 --- a/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +++ b/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java @@ -354,81 +354,81 @@ else if (!getBaseURI().equals(((NodeImpl) arg).getBaseURI())) { { short type = this.getNodeType(); switch (type) { - case Node.ELEMENT_NODE : { - - String namespace = this.getNamespaceURI(); - String prefix = this.getPrefix(); - if (namespace != null) { - // REVISIT: is it possible that prefix is empty string? - if (specifiedPrefix == null && prefix == specifiedPrefix) { - // looking for default namespace - return namespace; - } else if (prefix != null && prefix.equals(specifiedPrefix)) { - // non default namespace - return namespace; + case Node.ELEMENT_NODE : { + + String namespace = this.getNamespaceURI(); + String prefix = this.getPrefix(); + if (namespace != null) { + // REVISIT: is it possible that prefix is empty string? + if (specifiedPrefix == null && prefix == specifiedPrefix) { + // looking for default namespace + return namespace; + } else if (prefix != null && prefix.equals(specifiedPrefix)) { + // non default namespace + return namespace; + } } - } - if (this.hasAttributes()) { - NamedNodeMap map = this.getAttributes(); - int length = map.getLength(); - for (int i = 0; i < length; i++) { - Node attr = map.item(i); - String attrPrefix = attr.getPrefix(); - String value = attr.getNodeValue(); - namespace = attr.getNamespaceURI(); - if (namespace != null && namespace.equals("http://www.w3.org/2000/xmlns/")) { - // at this point we are dealing with DOM Level 2 nodes only - if (specifiedPrefix == null && - attr.getNodeName().equals("xmlns")) { - // default namespace - return value; - } else if (attrPrefix != null && - attrPrefix.equals("xmlns") && - attr.getLocalName().equals(specifiedPrefix)) { - // non default namespace - return value; + if (this.hasAttributes()) { + NamedNodeMap map = this.getAttributes(); + int length = map.getLength(); + for (int i = 0; i < length; i++) { + Node attr = map.item(i); + String attrPrefix = attr.getPrefix(); + String value = attr.getNodeValue(); + namespace = attr.getNamespaceURI(); + if (namespace != null && namespace.equals("http://www.w3.org/2000/xmlns/")) { + // at this point we are dealing with DOM Level 2 nodes only + if (specifiedPrefix == null && + attr.getNodeName().equals("xmlns")) { + // default namespace + return value; + } else if (attrPrefix != null && + attrPrefix.equals("xmlns") && + attr.getLocalName().equals(specifiedPrefix)) { + // non default namespace + return value; + } } } } - } - /* - NodeImpl ancestor = (NodeImpl)getElementAncestor(this); - if (ancestor != null) { - return ancestor.lookupNamespaceURI(specifiedPrefix); - } - */ + /* + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.lookupNamespaceURI(specifiedPrefix); + } + */ - return null; + return null; - } - /* - case Node.DOCUMENT_NODE : { - return((NodeImpl)((Document)this).getDocumentElement()).lookupNamespaceURI(specifiedPrefix) ; - } - */ - case Node.ENTITY_NODE : - case Node.NOTATION_NODE: - case Node.DOCUMENT_FRAGMENT_NODE: - case Node.DOCUMENT_TYPE_NODE: - // type is unknown - return null; - case Node.ATTRIBUTE_NODE: { - if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { - return getOwnerElement().lookupNamespaceURI(specifiedPrefix); - } - return null; - } - default: { /* - NodeImpl ancestor = (NodeImpl)getElementAncestor(this); - if (ancestor != null) { - return ancestor.lookupNamespaceURI(specifiedPrefix); - } - */ - return null; - } + case Node.DOCUMENT_NODE : { + return((NodeImpl)((Document)this).getDocumentElement()).lookupNamespaceURI(specifiedPrefix) ; + } + */ + case Node.ENTITY_NODE : + case Node.NOTATION_NODE: + case Node.DOCUMENT_FRAGMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + // type is unknown + return null; + case Node.ATTRIBUTE_NODE: { + if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { + return getOwnerElement().lookupNamespaceURI(specifiedPrefix); + + } + return null; + } + default: { + /* + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.lookupNamespaceURI(specifiedPrefix); + } + */ + return null; + } } } @@ -531,39 +531,39 @@ else if (!getBaseURI().equals(((NodeImpl) arg).getBaseURI())) { short type = this.getNodeType(); switch (type) { - /* - case Node.ELEMENT_NODE: { + /* + case Node.ELEMENT_NODE: { - String namespace = this.getNamespaceURI(); // to flip out children - return lookupNamespacePrefix(namespaceURI, (ElementImpl)this); - } + String namespace = this.getNamespaceURI(); // to flip out children + return lookupNamespacePrefix(namespaceURI, (ElementImpl)this); + } - case Node.DOCUMENT_NODE:{ - return((NodeImpl)((Document)this).getDocumentElement()).lookupPrefix(namespaceURI); - } - */ - case Node.ENTITY_NODE : - case Node.NOTATION_NODE: - case Node.DOCUMENT_FRAGMENT_NODE: - case Node.DOCUMENT_TYPE_NODE: - // type is unknown - return null; - case Node.ATTRIBUTE_NODE: { - if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { - return getOwnerElement().lookupPrefix(namespaceURI); + case Node.DOCUMENT_NODE:{ + return((NodeImpl)((Document)this).getDocumentElement()).lookupPrefix(namespaceURI); + } + */ + case Node.ENTITY_NODE : + case Node.NOTATION_NODE: + case Node.DOCUMENT_FRAGMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + // type is unknown + return null; + case Node.ATTRIBUTE_NODE: { + if (this.getOwnerElement().getNodeType() == Node.ELEMENT_NODE) { + return getOwnerElement().lookupPrefix(namespaceURI); + } + return null; + } + default: { + /* + NodeImpl ancestor = (NodeImpl)getElementAncestor(this); + if (ancestor != null) { + return ancestor.lookupPrefix(namespaceURI); + } + */ + return null; } - return null; - } - default: { - /* - NodeImpl ancestor = (NodeImpl)getElementAncestor(this); - if (ancestor != null) { - return ancestor.lookupPrefix(namespaceURI); - } - */ - return null; - } } } diff --git a/ext/nokogiri/depend b/ext/nokogiri/depend index 61659f7864..24f5908865 100644 --- a/ext/nokogiri/depend +++ b/ext/nokogiri/depend @@ -1,37 +1,38 @@ # -*-makefile-*- # DO NOT DELETE -html_document.o: ./nokogiri.h -html_element_description.o: ./nokogiri.h -html_entity_lookup.o: ./nokogiri.h -html_sax_parser_context.o: ./nokogiri.h -html_sax_push_parser.o: ./nokogiri.h -libxml2_backwards_compat.o: ./nokogiri.h -nokogiri.o: ./nokogiri.h -test_global_handlers.o: ./nokogiri.h -xml_attr.o: ./nokogiri.h -xml_attribute_decl.o: ./nokogiri.h -xml_cdata.o: ./nokogiri.h -xml_comment.o: ./nokogiri.h -xml_document.o: ./nokogiri.h -xml_document_fragment.o: ./nokogiri.h -xml_dtd.o: ./nokogiri.h -xml_element_content.o: ./nokogiri.h -xml_element_decl.o: ./nokogiri.h -xml_encoding_handler.o: ./nokogiri.h -xml_entity_decl.o: ./nokogiri.h -xml_entity_reference.o: ./nokogiri.h -xml_namespace.o: ./nokogiri.h -xml_node.o: ./nokogiri.h -xml_node_set.o: ./nokogiri.h -xml_processing_instruction.o: ./nokogiri.h -xml_reader.o: ./nokogiri.h -xml_relax_ng.o: ./nokogiri.h -xml_sax_parser.o: ./nokogiri.h -xml_sax_parser_context.o: ./nokogiri.h -xml_sax_push_parser.o: ./nokogiri.h -xml_schema.o: ./nokogiri.h -xml_syntax_error.o: ./nokogiri.h -xml_text.o: ./nokogiri.h -xml_xpath_context.o: ./nokogiri.h -xslt_stylesheet.o: ./nokogiri.h +gumbo.o: $(srcdir)/nokogiri.h +html_document.o: $(srcdir)/nokogiri.h +html_element_description.o: $(srcdir)/nokogiri.h +html_entity_lookup.o: $(srcdir)/nokogiri.h +html_sax_parser_context.o: $(srcdir)/nokogiri.h +html_sax_push_parser.o: $(srcdir)/nokogiri.h +libxml2_backwards_compat.o: $(srcdir)/nokogiri.h +nokogiri.o: $(srcdir)/nokogiri.h +test_global_handlers.o: $(srcdir)/nokogiri.h +xml_attr.o: $(srcdir)/nokogiri.h +xml_attribute_decl.o: $(srcdir)/nokogiri.h +xml_cdata.o: $(srcdir)/nokogiri.h +xml_comment.o: $(srcdir)/nokogiri.h +xml_document.o: $(srcdir)/nokogiri.h +xml_document_fragment.o: $(srcdir)/nokogiri.h +xml_dtd.o: $(srcdir)/nokogiri.h +xml_element_content.o: $(srcdir)/nokogiri.h +xml_element_decl.o: $(srcdir)/nokogiri.h +xml_encoding_handler.o: $(srcdir)/nokogiri.h +xml_entity_decl.o: $(srcdir)/nokogiri.h +xml_entity_reference.o: $(srcdir)/nokogiri.h +xml_namespace.o: $(srcdir)/nokogiri.h +xml_node.o: $(srcdir)/nokogiri.h +xml_node_set.o: $(srcdir)/nokogiri.h +xml_processing_instruction.o: $(srcdir)/nokogiri.h +xml_reader.o: $(srcdir)/nokogiri.h +xml_relax_ng.o: $(srcdir)/nokogiri.h +xml_sax_parser.o: $(srcdir)/nokogiri.h +xml_sax_parser_context.o: $(srcdir)/nokogiri.h +xml_sax_push_parser.o: $(srcdir)/nokogiri.h +xml_schema.o: $(srcdir)/nokogiri.h +xml_syntax_error.o: $(srcdir)/nokogiri.h +xml_text.o: $(srcdir)/nokogiri.h +xml_xpath_context.o: $(srcdir)/nokogiri.h +xslt_stylesheet.o: $(srcdir)/nokogiri.h diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index b08c3ced1f..3000148418 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -1,5 +1,8 @@ # frozen_string_literal: true -ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/ + +# rubocop:disable Style/GlobalVars + +ENV["RC_ARCHS"] = "" if RUBY_PLATFORM.include?("darwin") require "mkmf" require "rbconfig" @@ -8,20 +11,18 @@ require "pathname" # helpful constants -PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), '..', '..')) +PACKAGE_ROOT_DIR = File.expand_path(File.join(File.dirname(__FILE__), "..", "..")) REQUIRED_LIBXML_VERSION = "2.6.21" RECOMMENDED_LIBXML_VERSION = "2.9.3" -# The gem version constraint in the Rakefile is not respected at install time. -# Keep this version in sync with the one in the Rakefile ! -REQUIRED_MINI_PORTILE_VERSION = "~> 2.5.0" +REQUIRED_MINI_PORTILE_VERSION = "~> 2.8.0" # keep this version in sync with the one in the gemspec REQUIRED_PKG_CONFIG_VERSION = "~> 1.1" # Keep track of what versions of what libraries we build against OTHER_LIBRARY_VERSIONS = {} NOKOGIRI_HELP_MESSAGE = <<~HELP - USAGE: ruby #{$0} [options] + USAGE: ruby #{$PROGRAM_NAME} [options] Flags that are always valid: @@ -91,6 +92,9 @@ --with-xml2-include=DIRECTORY Look for xml2 headers in DIRECTORY. + --with-xml2-source-dir=DIRECTORY + (dev only) Build libxml2 from the source code in DIRECTORY + Related to libxslt: @@ -103,6 +107,9 @@ --with-xslt-include=DIRECTORY Look for xslt headers in DIRECTORY. + --with-xslt-source-dir=DIRECTORY + (dev only) Build libxslt from the source code in DIRECTORY + Related to libexslt: @@ -150,7 +157,7 @@ # utility functions # def config_clean? - enable_config('clean', true) + enable_config("clean", true) end def config_static? @@ -164,28 +171,28 @@ def config_cross_build? def config_system_libraries? enable_config("system-libraries", ENV.key?("NOKOGIRI_USE_SYSTEM_LIBRARIES")) do |_, default| - arg_config('--use-system-libraries', default) + arg_config("--use-system-libraries", default) end end def windows? - RbConfig::CONFIG['target_os'] =~ /mingw32|mswin/ + RbConfig::CONFIG["target_os"].match?(/mingw|mswin/) end def solaris? - RbConfig::CONFIG['target_os'] =~ /solaris/ + RbConfig::CONFIG["target_os"].include?("solaris") end def darwin? - RbConfig::CONFIG['target_os'] =~ /darwin/ + RbConfig::CONFIG["target_os"].include?("darwin") end def openbsd? - RbConfig::CONFIG['target_os'] =~ /openbsd/ + RbConfig::CONFIG["target_os"].include?("openbsd") end def aix? - RbConfig::CONFIG["target_os"] =~ /aix/ + RbConfig::CONFIG["target_os"].include?("aix") end def nix? @@ -193,7 +200,7 @@ def nix? end def truffle? - ::RUBY_ENGINE == 'truffleruby' + ::RUBY_ENGINE == "truffleruby" end def concat_flags(*args) @@ -204,6 +211,18 @@ def local_have_library(lib, func = nil, headers = nil) have_library(lib, func, headers) || have_library("lib#{lib}", func, headers) end +def gnome_source + # As of 2022-02-20, some mirrors have expired SSL certificates. I'm able to retrieve from my home, + # but whatever host is resolved on the github actions workers see an expired cert. + # + # See https://github.com/sparklemotion/nokogiri/runs/5266206403?check_suite_focus=true + if ENV["NOKOGIRI_USE_CANONICAL_GNOME_SOURCE"] + "https://download.gnome.org" + else + "https://mirror.csclub.uwaterloo.ca/gnome" # old reliable + end +end + LOCAL_PACKAGE_RESPONSE = Object.new def LOCAL_PACKAGE_RESPONSE.%(package) package ? "yes: #{package}" : "no" @@ -222,9 +241,9 @@ def try_package_configuration(pc) # let's fall back to the pkg-config gem, which knows how to parse .pc files, and wrap it with the # same logic as MakeMakefile#pkg_config begin - require 'rubygems' - gem('pkg-config', REQUIRED_PKG_CONFIG_VERSION) - require 'pkg-config' + require "rubygems" + gem("pkg-config", REQUIRED_PKG_CONFIG_VERSION) + require "pkg-config" checking_for("#{pc} using pkg-config gem version #{PKGConfig::VERSION}", LOCAL_PACKAGE_RESPONSE) do if PKGConfig.have_package(pc) @@ -278,17 +297,16 @@ def preserving_globals end def abort_could_not_find_library(lib) - abort("-----\n#{caller[0]}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----") + callers = caller(1..2).join("\n") + abort("-----\n#{callers}\n#{lib} is missing. Please locate mkmf.log to investigate how it is failing.\n-----") end -def chdir_for_build +def chdir_for_build(&block) # When using rake-compiler-dock on Windows, the underlying Virtualbox shared # folders don't support symlinks, but libiconv expects it for a build on # Linux. We work around this limitation by using the temp dir for cooking. - build_dir = ENV['RCD_HOST_RUBY_PLATFORM'].to_s =~ /mingw|mswin|cygwin/ ? '/tmp' : '.' - Dir.chdir(build_dir) do - yield - end + build_dir = /mingw|mswin|cygwin/.match?(ENV["RCD_HOST_RUBY_PLATFORM"].to_s) ? "/tmp" : "." + Dir.chdir(build_dir, &block) end def sh_export_path(path) @@ -339,8 +357,8 @@ def have_libxml_headers?(version = nil) end def try_link_iconv(using = nil) - checking_for(using ? "iconv using #{using}" : 'iconv') do - ['', '-liconv'].any? do |opt| + checking_for(using ? "iconv using #{using}" : "iconv") do + ["", "-liconv"].any? do |opt| preserving_globals do yield if block_given? @@ -364,6 +382,7 @@ def iconv_configure_flags ["iconv", "opt"].each do |target| config = preserving_globals { dir_config(target) } next unless config.any? && try_link_iconv("--with-#{target}-* flags") { dir_config(target) } + idirs, ldirs = config.map do |dirs| Array(dirs).flat_map do |dir| dir.split(File::PATH_SEPARATOR) @@ -371,22 +390,22 @@ def iconv_configure_flags end return [ - '--with-iconv=yes', - *("CPPFLAGS=#{idirs.map { |dir| '-I' + dir }.join(' ')}" if idirs), - *("LDFLAGS=#{ldirs.map { |dir| '-L' + dir }.join(' ')}" if ldirs), + "--with-iconv=yes", + *("CPPFLAGS=#{idirs.map { |dir| "-I" + dir }.join(" ")}" if idirs), + *("LDFLAGS=#{ldirs.map { |dir| "-L" + dir }.join(" ")}" if ldirs), ] end if try_link_iconv - return ['--with-iconv=yes'] + return ["--with-iconv=yes"] end - config = preserving_globals { have_package_configuration('libiconv') } - if config && try_link_iconv('pkg-config libiconv') { have_package_configuration('libiconv') } + config = preserving_globals { have_package_configuration("libiconv") } + if config && try_link_iconv("pkg-config libiconv") { have_package_configuration("libiconv") } cflags, ldflags, libs = config return [ - '--with-iconv=yes', + "--with-iconv=yes", "CPPFLAGS=#{cflags}", "LDFLAGS=#{ldflags}", "LIBS=#{libs}", @@ -396,23 +415,26 @@ def iconv_configure_flags abort_could_not_find_library("libiconv") end -def process_recipe(name, version, static_p, cross_p) - require 'rubygems' - gem('mini_portile2', REQUIRED_MINI_PORTILE_VERSION) - require 'mini_portile2' +def process_recipe(name, version, static_p, cross_p, cacheable_p = true) + require "rubygems" + gem("mini_portile2", REQUIRED_MINI_PORTILE_VERSION) # gemspec is not respected at install time + require "mini_portile2" message("Using mini_portile version #{MiniPortile::VERSION}\n") - if name != "libxml2" && name != "libxslt" + unless ["libxml2", "libxslt"].include?(name) OTHER_LIBRARY_VERSIONS[name] = version end MiniPortile.new(name, version).tap do |recipe| - recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") - # Prefer host_alias over host in order to use i586-mingw32msvc as - # correct compiler prefix for cross build, but use host if not set. + def recipe.port_path + "#{@target}/#{RUBY_PLATFORM}/#{@name}/#{@version}" + end + + recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p + # Prefer host_alias over host in order to use the correct compiler prefix for cross build, but + # use host if not set. recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"] - recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", name, "*.patch")].sort - recipe.configure_options << "--libdir=#{File.join(recipe.path, 'lib')}" + recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}" yield recipe @@ -456,10 +478,10 @@ def process_recipe(name, version, static_p, cross_p) ] end - if RbConfig::CONFIG['target_cpu'] == 'universal' - %w[CFLAGS LDFLAGS].each do |key| - unless env[key].include?('-arch') - env[key] = concat_flags(env[key], RbConfig::CONFIG['ARCH_FLAG']) + if RbConfig::CONFIG["target_cpu"] == "universal" + ["CFLAGS", "LDFLAGS"].each do |key| + unless env[key].include?("-arch") + env[key] = concat_flags(env[key], RbConfig::CONFIG["ARCH_FLAG"]) end end end @@ -468,8 +490,8 @@ def process_recipe(name, version, static_p, cross_p) "#{key}=#{value.strip}" end - checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{recipe.host}.installed" - if File.exist?(checkpoint) + checkpoint = "#{recipe.target}/#{recipe.name}-#{recipe.version}-#{RUBY_PLATFORM}.installed" + if File.exist?(checkpoint) && !recipe.source_directory message("Building Nokogiri with a packaged version of #{name}-#{version}.\n") else message(<<~EOM) @@ -482,11 +504,11 @@ def process_recipe(name, version, static_p, cross_p) message("The following patches are being applied:\n") recipe.patch_files.each do |patch| - message(" - %s\n" % File.basename(patch)) + message(format(" - %s\n", File.basename(patch))) end end - message(<<~EOM) + message(<<~EOM) if name != "libgumbo" The Nokogiri maintainers intend to provide timely security updates, but if this is a concern for you and want to use your OS/distro system library @@ -497,15 +519,14 @@ def process_recipe(name, version, static_p, cross_p) EOM - message(<<~EOM) if name == 'libxml2' - Note, however, that nokogiri cannot guarantee compatiblity with every + message(<<~EOM) if name == "libxml2" + Note, however, that nokogiri cannot guarantee compatibility with every version of libxml2 that may be provided by OS/package vendors. EOM - chdir_for_build do - recipe.cook - end + pp(recipe.files) + chdir_for_build { recipe.cook } FileUtils.touch(checkpoint) end recipe.activate @@ -516,7 +537,7 @@ def copy_packaged_libraries_headers(to_path:, from_recipes:) FileUtils.rm_rf(to_path, secure: true) FileUtils.mkdir(to_path) from_recipes.each do |recipe| - FileUtils.cp_r(Dir[File.join(recipe.path, 'include/*')], to_path) + FileUtils.cp_r(Dir[File.join(recipe.path, "include/*")], to_path) end end @@ -530,22 +551,22 @@ def do_clean pwd = Pathname(Dir.pwd) # Skip if this is a development work tree - unless (root + '.git').exist? + unless (root + ".git").exist? message("Cleaning files only used during build.\n") # (root + 'tmp') cannot be removed at this stage because # nokogiri.so is yet to be copied to lib. # clean the ports build directory - Pathname.glob(pwd.join('tmp', '*', 'ports')) do |dir| + Pathname.glob(pwd.join("tmp", "*", "ports")) do |dir| FileUtils.rm_rf(dir, verbose: true) end if config_static? # ports installation can be safely removed if statically linked. - FileUtils.rm_rf(root + 'ports', verbose: true) + FileUtils.rm_rf(root + "ports", verbose: true) else - FileUtils.rm_rf(root + 'ports' + 'archives', verbose: true) + FileUtils.rm_rf(root + "ports" + "archives", verbose: true) end end @@ -555,25 +576,25 @@ def do_clean # # main # -do_help if arg_config('--help') -do_clean if arg_config('--clean') +do_help if arg_config("--help") +do_clean if arg_config("--clean") if openbsd? && !config_system_libraries? - if %x(#{ENV['CC'] || '/usr/bin/cc'} -v 2>&1) !~ /clang/ - (ENV['CC'] ||= find_executable('egcc')) || + if %x(#{ENV["CC"] || "/usr/bin/cc"} -v 2>&1) !~ /clang/ + (ENV["CC"] ||= find_executable("egcc")) || abort("Please install gcc 4.9+ from ports using `pkg_add -v gcc`") end append_cppflags "-I/usr/local/include" end -if ENV['CC'] - RbConfig::CONFIG['CC'] = RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] +if ENV["CC"] + RbConfig::CONFIG["CC"] = RbConfig::MAKEFILE_CONFIG["CC"] = ENV["CC"] end # use same c compiler for libxml and libxslt -ENV['CC'] = RbConfig::CONFIG['CC'] +ENV["CC"] = RbConfig::CONFIG["CC"] -if arg_config('--prevent-strip') +if arg_config("--prevent-strip") old_cflags = $CFLAGS.split.join(" ") old_ldflags = $LDFLAGS.split.join(" ") old_dldflags = $DLDFLAGS.split.join(" ") @@ -591,6 +612,10 @@ def do_clean append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil? $LIBS = concat_flags($LIBS, ENV["LIBS"]) +# nokogumbo code uses C90/C99 features, let's make sure older compilers won't give +# errors/warnings. see #2302 +append_cflags(["-std=c99", "-Wno-declaration-after-statement"]) + # always include debugging information append_cflags("-g") @@ -619,13 +644,13 @@ def do_clean if config_system_libraries? message "Building nokogiri using system libraries.\n" ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z", - headers: "zlib.h", func: "gzdopen") + headers: "zlib.h", func: "gzdopen") ensure_package_configuration(opt: "xml2", pc: "libxml-2.0", lib: "xml2", - headers: "libxml/parser.h", func: "xmlParseDoc") + headers: "libxml/parser.h", func: "xmlParseDoc") ensure_package_configuration(opt: "xslt", pc: "libxslt", lib: "xslt", - headers: "libxslt/xslt.h", func: "xsltParseStylesheetDoc") + headers: "libxslt/xslt.h", func: "xsltParseStylesheetDoc") ensure_package_configuration(opt: "exslt", pc: "libexslt", lib: "exslt", - headers: "libexslt/exslt.h", func: "exsltFuncRegister") + headers: "libexslt/exslt.h", func: "exsltFuncRegister") have_libxml_headers?(REQUIRED_LIBXML_VERSION) || abort("ERROR: libxml2 version #{REQUIRED_LIBXML_VERSION} or later is required!") @@ -636,20 +661,20 @@ def do_clean message "Building nokogiri using packaged libraries.\n" static_p = config_static? - message "Static linking is #{static_p ? 'enabled' : 'disabled'}.\n" + message "Static linking is #{static_p ? "enabled" : "disabled"}.\n" cross_build_p = config_cross_build? - message "Cross build is #{cross_build_p ? 'enabled' : 'disabled'}.\n" + message "Cross build is #{cross_build_p ? "enabled" : "disabled"}.\n" - require 'yaml' + require "yaml" dependencies = YAML.load_file(File.join(PACKAGE_ROOT_DIR, "dependencies.yml")) - dir_config('zlib') + dir_config("zlib") if cross_build_p || windows? zlib_recipe = process_recipe("zlib", dependencies["zlib"]["version"], static_p, cross_build_p) do |recipe| recipe.files = [{ - url: "http://zlib.net/fossils/#{recipe.name}-#{recipe.version}.tar.gz", + url: "https://zlib.net/fossils/#{recipe.name}-#{recipe.version}.tar.gz", sha256: dependencies["zlib"]["sha256"], }] if windows? @@ -658,8 +683,8 @@ class << recipe def configure Dir.chdir(work_path) do - mk = File.read('win32/Makefile.gcc') - File.open('win32/Makefile.gcc', 'wb') do |f| + mk = File.read("win32/Makefile.gcc") + File.open("win32/Makefile.gcc", "wb") do |f| f.puts "BINARY_PATH = #{path}/bin" f.puts "LIBRARY_PATH = #{path}/lib" f.puts "INCLUDE_PATH = #{path}/include" @@ -671,7 +696,7 @@ def configure def configured? Dir.chdir(work_path) do - !!(File.read('win32/Makefile.gcc') =~ /^BINARY_PATH/) + !!(File.read("win32/Makefile.gcc") =~ /^BINARY_PATH/) end end @@ -687,16 +712,16 @@ def install else class << recipe def configure - cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g") - execute("configure", -["env", "CHOST=#{host}", "CFLAGS=#{cflags}", "./configure", "--static", configure_prefix]) - end - - def compile - if host =~ /darwin/ - execute("compile", "make AR=#{host}-libtool") - else - super + env = {} + env["CFLAGS"] = concat_flags(ENV["CFLAGS"], "-fPIC", "-g") + env["CHOST"] = host + execute("configure", ["./configure", "--static", configure_prefix], { env: env }) + if darwin? + # needed as of zlib 1.2.13 + Dir.chdir(work_path) do + makefile = File.read("Makefile").gsub(/^AR=.*$/, "AR=#{host}-libtool") + File.open("Makefile", "w") { |m| m.write(makefile) } + end end end end @@ -705,15 +730,19 @@ def compile unless nix? libiconv_recipe = process_recipe("libiconv", dependencies["libiconv"]["version"], static_p, -cross_build_p) do |recipe| + cross_build_p) do |recipe| recipe.files = [{ - url: "http://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz", + url: "https://ftp.gnu.org/pub/gnu/libiconv/#{recipe.name}-#{recipe.version}.tar.gz", sha256: dependencies["libiconv"]["sha256"], }] + # The libiconv configure script doesn't accept "arm64" host string but "aarch64" + recipe.host = recipe.host.gsub("arm64-apple-darwin", "aarch64-apple-darwin") + cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g") recipe.configure_options += [ + "--disable-dependency-tracking", "CPPFLAGS=-Wall", "CFLAGS=#{cflags}", "CXXFLAGS=#{cflags}", @@ -721,7 +750,7 @@ def compile ] end end - elsif darwin? && !have_header('iconv.h') + elsif darwin? && !have_header("iconv.h") abort(<<~EOM.chomp) ----- The file "iconv.h" is missing in your build environment, @@ -733,25 +762,40 @@ def compile Tools" to open the developer site, download the installer for your OS version and run it. ----- - EOM + EOM end - unless windows? - preserving_globals { local_have_library('z', 'gzdopen', 'zlib.h') } || - abort('zlib is missing; necessary for building libxml2') + if zlib_recipe + append_cppflags("-I#{zlib_recipe.path}/include") + $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH + ensure_package_configuration(opt: "zlib", pc: "zlib", lib: "z", + headers: "zlib.h", func: "gzdopen") + end + + if libiconv_recipe + append_cppflags("-I#{libiconv_recipe.path}/include") + $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH + ensure_package_configuration(opt: "iconv", pc: "iconv", lib: "iconv", + headers: "iconv.h", func: "iconv_open") end libxml2_recipe = process_recipe("libxml2", dependencies["libxml2"]["version"], static_p, cross_build_p) do |recipe| - recipe.files = [{ - url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz", - sha256: dependencies["libxml2"]["sha256"], - }] + source_dir = arg_config("--with-xml2-source-dir") + if source_dir + recipe.source_directory = source_dir + else + minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".") + recipe.files = [{ + url: "#{gnome_source}/sources/libxml2/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz", + sha256: dependencies["libxml2"]["sha256"], + }] + recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxml2", "*.patch")].sort + end cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g") if zlib_recipe recipe.configure_options << "--with-zlib=#{zlib_recipe.path}" - cflags = concat_flags(cflags, "-I#{zlib_recipe.path}/include") end if libiconv_recipe @@ -768,6 +812,12 @@ def compile cflags = concat_flags(cflags, "-ULIBXML_STATIC", "-DIN_LIBXML") end + recipe.configure_options << if source_dir + "--config-cache" + else + "--disable-dependency-tracking" + end + recipe.configure_options += [ "--without-python", "--without-readline", @@ -779,10 +829,17 @@ def compile end libxslt_recipe = process_recipe("libxslt", dependencies["libxslt"]["version"], static_p, cross_build_p) do |recipe| - recipe.files = [{ - url: "http://xmlsoft.org/sources/#{recipe.name}-#{recipe.version}.tar.gz", - sha256: dependencies["libxslt"]["sha256"], - }] + source_dir = arg_config("--with-xslt-source-dir") + if source_dir + recipe.source_directory = source_dir + else + minor_version = Gem::Version.new(recipe.version).segments.take(2).join(".") + recipe.files = [{ + url: "#{gnome_source}/sources/libxslt/#{minor_version}/#{recipe.name}-#{recipe.version}.tar.xz", + sha256: dependencies["libxslt"]["sha256"], + }] + recipe.patch_files = Dir[File.join(PACKAGE_ROOT_DIR, "patches", "libxslt", "*.patch")].sort + end cflags = concat_flags(ENV["CFLAGS"], "-O2", "-U_FORTIFY_SOURCE", "-g") @@ -790,6 +847,17 @@ def compile recipe.configure_options += ["RANLIB=/usr/bin/ranlib", "AR=/usr/bin/ar"] end + if windows? + cflags = concat_flags(cflags, "-ULIBXSLT_STATIC", "-DIN_LIBXSLT") + cflags = concat_flags(cflags, "-ULIBEXSLT_STATIC", "-DIN_LIBEXSLT") + end + + recipe.configure_options << if source_dir + "--config-cache" + else + "--disable-dependency-tracking" + end + recipe.configure_options += [ "--without-python", "--without-crypto", @@ -802,20 +870,17 @@ def compile append_cppflags("-DNOKOGIRI_PACKAGED_LIBRARIES") append_cppflags("-DNOKOGIRI_PRECOMPILED_LIBRARIES") if cross_build_p - $LIBPATH = ["#{zlib_recipe.path}/lib"] | $LIBPATH if zlib_recipe - $LIBPATH = ["#{libiconv_recipe.path}/lib"] | $LIBPATH if libiconv_recipe - $libs = $libs.shellsplit.tap do |libs| [libxml2_recipe, libxslt_recipe].each do |recipe| libname = recipe.name[/\Alib(.+)\z/, 1] File.join(recipe.path, "bin", "#{libname}-config").tap do |config| # call config scripts explicit with 'sh' for compat with Windows - $CPPFLAGS = %x(sh #{config} --cflags).strip << ' ' << $CPPFLAGS + $CPPFLAGS = %x(sh #{config} --cflags).strip << " " << $CPPFLAGS %x(sh #{config} --libs).strip.shellsplit.each do |arg| case arg when /\A-L(.+)\z/ # Prioritize ports' directories - $LIBPATH = if Regexp.last_match(1).start_with?(PACKAGE_ROOT_DIR + '/') + $LIBPATH = if Regexp.last_match(1).start_with?(PACKAGE_ROOT_DIR + "/") [Regexp.last_match(1)] | $LIBPATH else $LIBPATH | [Regexp.last_match(1)] @@ -823,26 +888,26 @@ def compile when /\A-l./ libs.unshift(arg) else - $LDFLAGS << ' ' << arg.shellescape + $LDFLAGS << " " << arg.shellescape end end end - patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(' ') + patches_string = recipe.patch_files.map { |path| File.basename(path) }.join(" ") append_cppflags(%[-DNOKOGIRI_#{recipe.name.upcase}_PATCHES="\\\"#{patches_string}\\\""]) case libname - when 'xml2' + when "xml2" # xslt-config --libs or pkg-config libxslt --libs does not include # -llzma, so we need to add it manually when linking statically. - if static_p && preserving_globals { local_have_library('lzma') } + if static_p && preserving_globals { local_have_library("lzma") } # Add it at the end; GH #988 - libs << '-llzma' + libs << "-llzma" end - when 'xslt' + when "xslt" # xslt-config does not have a flag to emit options including # -lexslt, so add it manually. - libs.unshift('-lexslt') + libs.unshift("-lexslt") end end end.shelljoin @@ -850,10 +915,10 @@ def compile if static_p $libs = $libs.shellsplit.map do |arg| case arg - when '-lxml2' - File.join(libxml2_recipe.path, 'lib', libflag_to_filename(arg)) - when '-lxslt', '-lexslt' - File.join(libxslt_recipe.path, 'lib', libflag_to_filename(arg)) + when "-lxml2" + File.join(libxml2_recipe.path, "lib", libflag_to_filename(arg)) + when "-lxslt", "-lexslt" + File.join(libxslt_recipe.path, "lib", libflag_to_filename(arg)) else arg end @@ -865,14 +930,66 @@ def compile ensure_func("exsltFuncRegister", "libexslt/exslt.h") end -have_func('xmlHasFeature') || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21 -have_func('xmlFirstElementChild') # introduced in libxml 2.7.3 -have_func('xmlRelaxNGSetParserStructuredErrors') # introduced in libxml 2.6.24 -have_func('xmlRelaxNGSetValidStructuredErrors') # introduced in libxml 2.6.21 -have_func('xmlSchemaSetValidStructuredErrors') # introduced in libxml 2.6.23 -have_func('xmlSchemaSetParserStructuredErrors') # introduced in libxml 2.6.23 +libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_build_p, false) do |recipe| + recipe.configure_options = [] + + class << recipe + def downloaded? + true + end + + def extract + target = File.join(tmp_path, "gumbo-parser") + output("Copying gumbo-parser files into #{target}...") + FileUtils.mkdir_p(target) + FileUtils.cp(Dir.glob(File.join(PACKAGE_ROOT_DIR, "gumbo-parser/src/*")), target) + end + + def configured? + true + end + + def install + lib_dir = File.join(port_path, "lib") + inc_dir = File.join(port_path, "include") + FileUtils.mkdir_p([lib_dir, inc_dir]) + FileUtils.cp(File.join(work_path, "libgumbo.a"), lib_dir) + FileUtils.cp(Dir.glob(File.join(work_path, "*.h")), inc_dir) + end + + def compile + cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g") + + env = { "CC" => gcc_cmd, "CFLAGS" => cflags } + if config_cross_build? + if /darwin/.match?(host) + env["AR"] = "#{host}-libtool" + env["ARFLAGS"] = "-o" + else + env["AR"] = "#{host}-ar" + end + env["RANLIB"] = "#{host}-ranlib" + end + + execute("compile", make_cmd, { env: env }) + end + end +end +append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}") +$libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a") +$LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")] +ensure_func("gumbo_parse_with_options", "gumbo.h") + +have_func("xmlHasFeature") || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21 +have_func("xmlFirstElementChild") # introduced in libxml 2.7.3 +have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24 +have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21 +have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 +have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 +have_func("rb_gc_location") # introduced in Ruby 2.7 +have_func("rb_category_warning") # introduced in Ruby 3.0 -have_func('vasprintf') +have_func("vasprintf") other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",") append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""]) @@ -882,25 +999,25 @@ def compile # When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include # These are packaged up by the cross-compiling callback in the ExtensionTask copy_packaged_libraries_headers(to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"), - from_recipes: [libxml2_recipe, libxslt_recipe]) + from_recipes: [libxml2_recipe, libxslt_recipe]) else # When compiling during installation, install packaged libraries' header files into ext/nokogiri/include copy_packaged_libraries_headers(to_path: "include", - from_recipes: [libxml2_recipe, libxslt_recipe]) + from_recipes: [libxml2_recipe, libxslt_recipe]) $INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"] end end -create_makefile('nokogiri/nokogiri') +create_makefile("nokogiri/nokogiri") if config_clean? # Do not clean if run in a development work tree. - File.open('Makefile', 'at') do |mk| + File.open("Makefile", "at") do |mk| mk.print(<<~EOF) all: clean-ports clean-ports: $(DLLIB) - \t-$(Q)$(RUBY) $(srcdir)/extconf.rb --clean --#{static_p ? 'enable' : 'disable'}-static + \t-$(Q)$(RUBY) $(srcdir)/extconf.rb --clean --#{static_p ? "enable" : "disable"}-static EOF end end diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c new file mode 100644 index 0000000000..b732fca43f --- /dev/null +++ b/ext/nokogiri/gumbo.c @@ -0,0 +1,584 @@ +// +// Copyright 2013-2021 Sam Ruby, Stephen Checkoway +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// nokogumbo.c defines the following: +// +// class Nokogumbo +// def parse(utf8_string) # returns Nokogiri::HTML5::Document +// end +// +// Processing starts by calling gumbo_parse_with_options. The resulting document tree +// is then walked, a parallel libxml2 tree is constructed, and the final document is +// then wrapped using Nokogiri_wrap_xml_document. This approach reduces memory and CPU +// requirements as Ruby objects are only built when necessary. +// + +#include + +#include "gumbo.h" + +VALUE cNokogiriHtml5Document; + +// Interned symbols +static ID internal_subset; +static ID parent; + +/* Backwards compatibility to Ruby 2.1.0 */ +#if RUBY_API_VERSION_CODE < 20200 +#define ONIG_ESCAPE_UCHAR_COLLISION 1 +#include + +static VALUE +rb_utf8_str_new(const char *str, long length) +{ + return rb_enc_str_new(str, length, rb_utf8_encoding()); +} + +static VALUE +rb_utf8_str_new_cstr(const char *str) +{ + return rb_enc_str_new_cstr(str, rb_utf8_encoding()); +} + +static VALUE +rb_utf8_str_new_static(const char *str, long length) +{ + return rb_enc_str_new(str, length, rb_utf8_encoding()); +} +#endif + +#include +#include +#include + +// URI = system id +// external id = public id +static xmlDocPtr +new_html_doc(const char *dtd_name, const char *system, const char *public) +{ + // These two libxml2 functions take the public and system ids in + // opposite orders. + htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL); + assert(doc); + if (dtd_name) { + xmlCreateIntSubset(doc, (const xmlChar *)dtd_name, (const xmlChar *)public, (const xmlChar *)system); + } + return doc; +} + +static xmlNodePtr +get_parent(xmlNodePtr node) +{ + return node->parent; +} + +static GumboOutput * +perform_parse(const GumboOptions *options, VALUE input) +{ + assert(RTEST(input)); + Check_Type(input, T_STRING); + GumboOutput *output = gumbo_parse_with_options( + options, + RSTRING_PTR(input), + RSTRING_LEN(input) + ); + + const char *status_string = gumbo_status_to_string(output->status); + switch (output->status) { + case GUMBO_STATUS_OK: + break; + case GUMBO_STATUS_TOO_MANY_ATTRIBUTES: + case GUMBO_STATUS_TREE_TOO_DEEP: + gumbo_destroy_output(output); + rb_raise(rb_eArgError, "%s", status_string); + case GUMBO_STATUS_OUT_OF_MEMORY: + gumbo_destroy_output(output); + rb_raise(rb_eNoMemError, "%s", status_string); + } + return output; +} + +static xmlNsPtr +lookup_or_add_ns( + xmlDocPtr doc, + xmlNodePtr root, + const char *href, + const char *prefix +) +{ + xmlNsPtr ns = xmlSearchNs(doc, root, (const xmlChar *)prefix); + if (ns) { + return ns; + } + return xmlNewNs(root, (const xmlChar *)href, (const xmlChar *)prefix); +} + +static void +set_line(xmlNodePtr node, size_t line) +{ + // libxml2 uses 65535 to mean look elsewhere for the line number on some + // nodes. + if (line < 65535) { + node->line = (unsigned short)line; + } +} + +// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted +// at gumbo_node. +static void +build_tree( + xmlDocPtr doc, + xmlNodePtr xml_output_node, + const GumboNode *gumbo_node +) +{ + xmlNodePtr xml_root = NULL; + xmlNodePtr xml_node = xml_output_node; + size_t child_index = 0; + + while (true) { + assert(gumbo_node != NULL); + const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT ? + &gumbo_node->v.document.children : &gumbo_node->v.element.children; + if (child_index >= children->length) { + // Move up the tree and to the next child. + if (xml_node == xml_output_node) { + // We've built as much of the tree as we can. + return; + } + child_index = gumbo_node->index_within_parent + 1; + gumbo_node = gumbo_node->parent; + xml_node = get_parent(xml_node); + // Children of fragments don't share the same root, so reset it and + // it'll be set below. In the non-fragment case, this will only happen + // after the html element has been finished at which point there are no + // further elements. + if (xml_node == xml_output_node) { + xml_root = NULL; + } + continue; + } + const GumboNode *gumbo_child = children->data[child_index++]; + xmlNodePtr xml_child; + + switch (gumbo_child->type) { + case GUMBO_NODE_DOCUMENT: + abort(); // Bug in Gumbo. + + case GUMBO_NODE_TEXT: + case GUMBO_NODE_WHITESPACE: + xml_child = xmlNewDocText(doc, (const xmlChar *)gumbo_child->v.text.text); + set_line(xml_child, gumbo_child->v.text.start_pos.line); + xmlAddChild(xml_node, xml_child); + break; + + case GUMBO_NODE_CDATA: + xml_child = xmlNewCDataBlock(doc, (const xmlChar *)gumbo_child->v.text.text, + (int) strlen(gumbo_child->v.text.text)); + set_line(xml_child, gumbo_child->v.text.start_pos.line); + xmlAddChild(xml_node, xml_child); + break; + + case GUMBO_NODE_COMMENT: + xml_child = xmlNewDocComment(doc, (const xmlChar *)gumbo_child->v.text.text); + set_line(xml_child, gumbo_child->v.text.start_pos.line); + xmlAddChild(xml_node, xml_child); + break; + + case GUMBO_NODE_TEMPLATE: + // XXX: Should create a template element and a new DocumentFragment + case GUMBO_NODE_ELEMENT: { + xml_child = xmlNewDocNode(doc, NULL, (const xmlChar *)gumbo_child->v.element.name, NULL); + set_line(xml_child, gumbo_child->v.element.start_pos.line); + if (xml_root == NULL) { + xml_root = xml_child; + } + xmlNsPtr ns = NULL; + switch (gumbo_child->v.element.tag_namespace) { + case GUMBO_NAMESPACE_HTML: + break; + case GUMBO_NAMESPACE_SVG: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/svg", "svg"); + break; + case GUMBO_NAMESPACE_MATHML: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1998/Math/MathML", "math"); + break; + } + if (ns != NULL) { + xmlSetNs(xml_child, ns); + } + xmlAddChild(xml_node, xml_child); + + // Add the attributes. + const GumboVector *attrs = &gumbo_child->v.element.attributes; + for (size_t i = 0; i < attrs->length; i++) { + const GumboAttribute *attr = attrs->data[i]; + + switch (attr->attr_namespace) { + case GUMBO_ATTR_NAMESPACE_XLINK: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1999/xlink", "xlink"); + break; + + case GUMBO_ATTR_NAMESPACE_XML: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/XML/1998/namespace", "xml"); + break; + + case GUMBO_ATTR_NAMESPACE_XMLNS: + ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/xmlns/", "xmlns"); + break; + + default: + ns = NULL; + } + xmlNewNsProp(xml_child, ns, (const xmlChar *)attr->name, (const xmlChar *)attr->value); + } + + // Add children for this element. + child_index = 0; + gumbo_node = gumbo_child; + xml_node = xml_child; + } + } + } +} + +static void +add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url) +{ + const char *input_str = RSTRING_PTR(input); + size_t input_len = RSTRING_LEN(input); + + // Add parse errors to rdoc. + if (output->errors.length) { + const GumboVector *errors = &output->errors; + VALUE rerrors = rb_ary_new2(errors->length); + + for (size_t i = 0; i < errors->length; i++) { + GumboError *err = errors->data[i]; + GumboSourcePosition position = gumbo_error_position(err); + char *msg; + size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg); + VALUE err_str = rb_utf8_str_new(msg, size); + free(msg); + VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError); + const char *error_code = gumbo_error_code(err); + VALUE str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil; + rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER + rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR + rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR + rb_iv_set(syntax_error, "@file", url); + rb_iv_set(syntax_error, "@line", INT2NUM(position.line)); + rb_iv_set(syntax_error, "@str1", str1); + rb_iv_set(syntax_error, "@str2", Qnil); + rb_iv_set(syntax_error, "@str3", Qnil); + rb_iv_set(syntax_error, "@int1", INT2NUM(0)); + rb_iv_set(syntax_error, "@column", INT2NUM(position.column)); + rb_ary_push(rerrors, syntax_error); + } + rb_iv_set(rdoc, "@errors", rerrors); + } +} + +typedef struct { + GumboOutput *output; + VALUE input; + VALUE url_or_frag; + xmlDocPtr doc; +} ParseArgs; + +static VALUE +parse_cleanup(VALUE parse_args) +{ + ParseArgs *args = (ParseArgs *)parse_args; + gumbo_destroy_output(args->output); + // Make sure garbage collection doesn't mark the objects as being live based + // on references from the ParseArgs. This may be unnecessary. + args->input = Qnil; + args->url_or_frag = Qnil; + if (args->doc != NULL) { + xmlFreeDoc(args->doc); + } + return Qnil; +} + +static VALUE parse_continue(VALUE parse_args); + +/* + * @!visibility protected + */ +static VALUE +parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) +{ + GumboOptions options = kGumboDefaultOptions; + options.max_attributes = NUM2INT(max_attributes); + options.max_errors = NUM2INT(max_errors); + options.max_tree_depth = NUM2INT(max_depth); + + GumboOutput *output = perform_parse(&options, input); + ParseArgs args = { + .output = output, + .input = input, + .url_or_frag = url, + .doc = NULL, + }; + + return rb_ensure(parse_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args)); +} + +static VALUE +parse_continue(VALUE parse_args) +{ + ParseArgs *args = (ParseArgs *)parse_args; + GumboOutput *output = args->output; + xmlDocPtr doc; + if (output->document->v.document.has_doctype) { + const char *name = output->document->v.document.name; + const char *public = output->document->v.document.public_identifier; + const char *system = output->document->v.document.system_identifier; + public = public[0] ? public : NULL; + system = system[0] ? system : NULL; + doc = new_html_doc(name, system, public); + } else { + doc = new_html_doc(NULL, NULL, NULL); + } + args->doc = doc; // Make sure doc gets cleaned up if an error is thrown. + build_tree(doc, (xmlNodePtr)doc, output->document); + VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc); + args->doc = NULL; // The Ruby runtime now owns doc so don't delete it. + add_errors(output, rdoc, args->input, args->url_or_frag); + return rdoc; +} + +static int +lookup_namespace(VALUE node, bool require_known_ns) +{ + ID namespace, href; + CONST_ID(namespace, "namespace"); + CONST_ID(href, "href"); + VALUE ns = rb_funcall(node, namespace, 0); + + if (NIL_P(ns)) { + return GUMBO_NAMESPACE_HTML; + } + ns = rb_funcall(ns, href, 0); + assert(RTEST(ns)); + Check_Type(ns, T_STRING); + + const char *href_ptr = RSTRING_PTR(ns); + size_t href_len = RSTRING_LEN(ns); +#define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len)) + if (NAMESPACE_P("http://www.w3.org/1999/xhtml")) { + return GUMBO_NAMESPACE_HTML; + } + if (NAMESPACE_P("http://www.w3.org/1998/Math/MathML")) { + return GUMBO_NAMESPACE_MATHML; + } + if (NAMESPACE_P("http://www.w3.org/2000/svg")) { + return GUMBO_NAMESPACE_SVG; + } +#undef NAMESPACE_P + if (require_known_ns) { + rb_raise(rb_eArgError, "Unexpected namespace URI \"%*s\"", (int)href_len, href_ptr); + } + return -1; +} + +static xmlNodePtr +extract_xml_node(VALUE node) +{ + xmlNodePtr xml_node; + Noko_Node_Get_Struct(node, xmlNode, xml_node); + return xml_node; +} + +static VALUE fragment_continue(VALUE parse_args); + +/* + * @!visibility protected + */ +static VALUE +fragment( + VALUE self, + VALUE doc_fragment, + VALUE tags, + VALUE ctx, + VALUE max_attributes, + VALUE max_errors, + VALUE max_depth +) +{ + ID name = rb_intern_const("name"); + const char *ctx_tag; + GumboNamespaceEnum ctx_ns; + GumboQuirksModeEnum quirks_mode; + bool form = false; + const char *encoding = NULL; + + if (NIL_P(ctx)) { + ctx_tag = "body"; + ctx_ns = GUMBO_NAMESPACE_HTML; + } else if (TYPE(ctx) == T_STRING) { + ctx_tag = StringValueCStr(ctx); + ctx_ns = GUMBO_NAMESPACE_HTML; + size_t len = RSTRING_LEN(ctx); + const char *colon = memchr(ctx_tag, ':', len); + if (colon) { + switch (colon - ctx_tag) { + case 3: + if (st_strncasecmp(ctx_tag, "svg", 3) != 0) { + goto error; + } + ctx_ns = GUMBO_NAMESPACE_SVG; + break; + case 4: + if (st_strncasecmp(ctx_tag, "html", 4) == 0) { + ctx_ns = GUMBO_NAMESPACE_HTML; + } else if (st_strncasecmp(ctx_tag, "math", 4) == 0) { + ctx_ns = GUMBO_NAMESPACE_MATHML; + } else { + goto error; + } + break; + default: +error: + rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag); + } + ctx_tag = colon + 1; + } else { + // For convenience, put 'svg' and 'math' in their namespaces. + if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0) { + ctx_ns = GUMBO_NAMESPACE_SVG; + } else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0) { + ctx_ns = GUMBO_NAMESPACE_MATHML; + } + } + + // Check if it's a form. + form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0; + } else { + ID element_ = rb_intern_const("element?"); + + // Context fragment name. + VALUE tag_name = rb_funcall(ctx, name, 0); + assert(RTEST(tag_name)); + Check_Type(tag_name, T_STRING); + ctx_tag = StringValueCStr(tag_name); + + // Context fragment namespace. + ctx_ns = lookup_namespace(ctx, true); + + // Check for a form ancestor, including self. + for (VALUE node = ctx; + !NIL_P(node); + node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) { + if (!RTEST(rb_funcall(node, element_, 0))) { + continue; + } + VALUE element_name = rb_funcall(node, name, 0); + if (RSTRING_LEN(element_name) == 4 + && !st_strcasecmp(RSTRING_PTR(element_name), "form") + && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) { + form = true; + break; + } + } + + // Encoding. + if (RSTRING_LEN(tag_name) == 14 + && !st_strcasecmp(ctx_tag, "annotation-xml")) { + VALUE enc = rb_funcall(ctx, rb_intern_const("[]"), + rb_utf8_str_new_static("encoding", 8)); + if (RTEST(enc)) { + Check_Type(enc, T_STRING); + encoding = StringValueCStr(enc); + } + } + } + + // Quirks mode. + VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0); + VALUE dtd = rb_funcall(doc, internal_subset, 0); + if (NIL_P(dtd)) { + quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS; + } else { + VALUE dtd_name = rb_funcall(dtd, name, 0); + VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0); + VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0); + quirks_mode = gumbo_compute_quirks_mode( + NIL_P(dtd_name) ? NULL : StringValueCStr(dtd_name), + NIL_P(pubid) ? NULL : StringValueCStr(pubid), + NIL_P(sysid) ? NULL : StringValueCStr(sysid) + ); + } + + // Perform a fragment parse. + int depth = NUM2INT(max_depth); + GumboOptions options = kGumboDefaultOptions; + options.max_attributes = NUM2INT(max_attributes); + options.max_errors = NUM2INT(max_errors); + // Add one to account for the HTML element. + options.max_tree_depth = depth < 0 ? -1 : (depth + 1); + options.fragment_context = ctx_tag; + options.fragment_namespace = ctx_ns; + options.fragment_encoding = encoding; + options.quirks_mode = quirks_mode; + options.fragment_context_has_form_ancestor = form; + + GumboOutput *output = perform_parse(&options, tags); + ParseArgs args = { + .output = output, + .input = tags, + .url_or_frag = doc_fragment, + .doc = (xmlDocPtr)extract_xml_node(doc), + }; + rb_ensure(fragment_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args)); + return Qnil; +} + +static VALUE +fragment_continue(VALUE parse_args) +{ + ParseArgs *args = (ParseArgs *)parse_args; + GumboOutput *output = args->output; + VALUE doc_fragment = args->url_or_frag; + xmlDocPtr xml_doc = args->doc; + + args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it. + xmlNodePtr xml_frag = extract_xml_node(doc_fragment); + build_tree(xml_doc, xml_frag, output->root); + add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9)); + return Qnil; +} + +// Initialize the Nokogumbo class and fetch constants we will use later. +void +noko_init_gumbo() +{ + // Class constants. + cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtml4Document); + rb_gc_register_mark_object(cNokogiriHtml5Document); + + // Interned symbols. + internal_subset = rb_intern_const("internal_subset"); + parent = rb_intern_const("parent"); + + // Define Nokogumbo module with parse and fragment methods. + rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5); + rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6); +} + +// vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab: diff --git a/ext/nokogiri/html_document.c b/ext/nokogiri/html4_document.c similarity index 88% rename from ext/nokogiri/html_document.c rename to ext/nokogiri/html4_document.c index 7462f854ef..9e9a016957 100644 --- a/ext/nokogiri/html_document.c +++ b/ext/nokogiri/html4_document.c @@ -1,6 +1,6 @@ #include -VALUE cNokogiriHtmlDocument ; +VALUE cNokogiriHtml4Document ; static ID id_encoding_found; static ID id_to_s; @@ -34,7 +34,7 @@ rb_html_document_s_new(int argc, VALUE *argv, VALUE klass) * read_io(io, url, encoding, options) * * Read the HTML document from +io+ with given +url+, +encoding+, - * and +options+. See Nokogiri::HTML.parse + * and +options+. See Nokogiri::HTML4.parse */ static VALUE rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_encoding, VALUE rb_options) @@ -92,7 +92,7 @@ rb_html_document_s_read_io(VALUE klass, VALUE rb_io, VALUE rb_url, VALUE rb_enco * read_memory(string, url, encoding, options) * * Read the HTML document contained in +string+ with given +url+, +encoding+, - * and +options+. See Nokogiri::HTML.parse + * and +options+. See Nokogiri::HTML4.parse */ static VALUE rb_html_document_s_read_memory(VALUE klass, VALUE rb_html, VALUE rb_url, VALUE rb_encoding, VALUE rb_options) @@ -153,13 +153,13 @@ void noko_init_html_document() { assert(cNokogiriXmlDocument); - cNokogiriHtmlDocument = rb_define_class_under(mNokogiriHtml, "Document", cNokogiriXmlDocument); + cNokogiriHtml4Document = rb_define_class_under(mNokogiriHtml4, "Document", cNokogiriXmlDocument); - rb_define_singleton_method(cNokogiriHtmlDocument, "read_memory", rb_html_document_s_read_memory, 4); - rb_define_singleton_method(cNokogiriHtmlDocument, "read_io", rb_html_document_s_read_io, 4); - rb_define_singleton_method(cNokogiriHtmlDocument, "new", rb_html_document_s_new, -1); + rb_define_singleton_method(cNokogiriHtml4Document, "read_memory", rb_html_document_s_read_memory, 4); + rb_define_singleton_method(cNokogiriHtml4Document, "read_io", rb_html_document_s_read_io, 4); + rb_define_singleton_method(cNokogiriHtml4Document, "new", rb_html_document_s_new, -1); - rb_define_method(cNokogiriHtmlDocument, "type", rb_html_document_type, 0); + rb_define_method(cNokogiriHtml4Document, "type", rb_html_document_type, 0); id_encoding_found = rb_intern("encoding_found"); id_to_s = rb_intern("to_s"); diff --git a/ext/nokogiri/html_element_description.c b/ext/nokogiri/html4_element_description.c similarity index 75% rename from ext/nokogiri/html_element_description.c rename to ext/nokogiri/html4_element_description.c index 2a5f82ed74..a9ba9f784a 100644 --- a/ext/nokogiri/html_element_description.c +++ b/ext/nokogiri/html4_element_description.c @@ -1,6 +1,6 @@ #include -VALUE cNokogiriHtmlElementDescription ; +VALUE cNokogiriHtml4ElementDescription ; /* * call-seq: @@ -266,27 +266,29 @@ get_description(VALUE klass, VALUE tag_name) ); if (NULL == description) { return Qnil; } - return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description); + return Data_Wrap_Struct(klass, 0, 0, DISCARD_CONST_QUAL(void *, description)); } void noko_init_html_element_description() { - cNokogiriHtmlElementDescription = rb_define_class_under(mNokogiriHtml, "ElementDescription", rb_cObject); - - rb_define_singleton_method(cNokogiriHtmlElementDescription, "[]", get_description, 1); - - rb_define_method(cNokogiriHtmlElementDescription, "name", name, 0); - rb_define_method(cNokogiriHtmlElementDescription, "implied_start_tag?", implied_start_tag_eh, 0); - rb_define_method(cNokogiriHtmlElementDescription, "implied_end_tag?", implied_end_tag_eh, 0); - rb_define_method(cNokogiriHtmlElementDescription, "save_end_tag?", save_end_tag_eh, 0); - rb_define_method(cNokogiriHtmlElementDescription, "empty?", empty_eh, 0); - rb_define_method(cNokogiriHtmlElementDescription, "deprecated?", deprecated_eh, 0); - rb_define_method(cNokogiriHtmlElementDescription, "inline?", inline_eh, 0); - rb_define_method(cNokogiriHtmlElementDescription, "description", description, 0); - rb_define_method(cNokogiriHtmlElementDescription, "sub_elements", sub_elements, 0); - rb_define_method(cNokogiriHtmlElementDescription, "default_sub_element", default_sub_element, 0); - rb_define_method(cNokogiriHtmlElementDescription, "optional_attributes", optional_attributes, 0); - rb_define_method(cNokogiriHtmlElementDescription, "deprecated_attributes", deprecated_attributes, 0); - rb_define_method(cNokogiriHtmlElementDescription, "required_attributes", required_attributes, 0); + cNokogiriHtml4ElementDescription = rb_define_class_under(mNokogiriHtml4, "ElementDescription", rb_cObject); + + rb_undef_alloc_func(cNokogiriHtml4ElementDescription); + + rb_define_singleton_method(cNokogiriHtml4ElementDescription, "[]", get_description, 1); + + rb_define_method(cNokogiriHtml4ElementDescription, "name", name, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "implied_start_tag?", implied_start_tag_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "implied_end_tag?", implied_end_tag_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "save_end_tag?", save_end_tag_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "empty?", empty_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "deprecated?", deprecated_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "inline?", inline_eh, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "description", description, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "sub_elements", sub_elements, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "default_sub_element", default_sub_element, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "optional_attributes", optional_attributes, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "deprecated_attributes", deprecated_attributes, 0); + rb_define_method(cNokogiriHtml4ElementDescription, "required_attributes", required_attributes, 0); } diff --git a/ext/nokogiri/html_entity_lookup.c b/ext/nokogiri/html4_entity_lookup.c similarity index 60% rename from ext/nokogiri/html_entity_lookup.c rename to ext/nokogiri/html4_entity_lookup.c index a63f58a528..ee1589cb43 100644 --- a/ext/nokogiri/html_entity_lookup.c +++ b/ext/nokogiri/html4_entity_lookup.c @@ -1,17 +1,17 @@ #include -static VALUE cNokogiriHtmlEntityLookup; +static VALUE cNokogiriHtml4EntityLookup; /* * call-seq: * get(key) * - * Get the HTML::EntityDescription for +key+ + * Get the HTML4::EntityDescription for +key+ */ static VALUE get(VALUE _, VALUE rb_entity_name) { - VALUE cNokogiriHtmlEntityDescription; + VALUE cNokogiriHtml4EntityDescription; const htmlEntityDesc *c_entity_desc; VALUE rb_constructor_args[3]; @@ -24,14 +24,14 @@ get(VALUE _, VALUE rb_entity_name) rb_constructor_args[1] = NOKOGIRI_STR_NEW2(c_entity_desc->name); rb_constructor_args[2] = NOKOGIRI_STR_NEW2(c_entity_desc->desc); - cNokogiriHtmlEntityDescription = rb_const_get_at(mNokogiriHtml, rb_intern("EntityDescription")); - return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtmlEntityDescription); + cNokogiriHtml4EntityDescription = rb_const_get_at(mNokogiriHtml4, rb_intern("EntityDescription")); + return rb_class_new_instance(3, rb_constructor_args, cNokogiriHtml4EntityDescription); } void noko_init_html_entity_lookup() { - cNokogiriHtmlEntityLookup = rb_define_class_under(mNokogiriHtml, "EntityLookup", rb_cObject); + cNokogiriHtml4EntityLookup = rb_define_class_under(mNokogiriHtml4, "EntityLookup", rb_cObject); - rb_define_method(cNokogiriHtmlEntityLookup, "get", get, 1); + rb_define_method(cNokogiriHtml4EntityLookup, "get", get, 1); } diff --git a/ext/nokogiri/html_sax_parser_context.c b/ext/nokogiri/html4_sax_parser_context.c similarity index 83% rename from ext/nokogiri/html_sax_parser_context.c rename to ext/nokogiri/html4_sax_parser_context.c index eb3361b296..54adca4cb4 100644 --- a/ext/nokogiri/html_sax_parser_context.c +++ b/ext/nokogiri/html4_sax_parser_context.c @@ -1,6 +1,6 @@ #include -VALUE cNokogiriHtmlSaxParserContext ; +VALUE cNokogiriHtml4SaxParserContext ; static void deallocate(xmlParserCtxtPtr ctxt) @@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding) { htmlParserCtxtPtr ctxt; - if (NIL_P(data)) { - rb_raise(rb_eArgError, "data cannot be nil"); - } + Check_Type(data, T_STRING); + if (!(int)RSTRING_LEN(data)) { rb_raise(rb_eRuntimeError, "data cannot be empty"); } @@ -110,10 +109,11 @@ void noko_init_html_sax_parser_context() { assert(cNokogiriXmlSaxParserContext); - cNokogiriHtmlSaxParserContext = rb_define_class_under(mNokogiriHtmlSax, "ParserContext", cNokogiriXmlSaxParserContext); + cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext", + cNokogiriXmlSaxParserContext); - rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "memory", parse_memory, 2); - rb_define_singleton_method(cNokogiriHtmlSaxParserContext, "file", parse_file, 2); + rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2); + rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2); - rb_define_method(cNokogiriHtmlSaxParserContext, "parse_with", parse_with, 1); + rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1); } diff --git a/ext/nokogiri/html_sax_push_parser.c b/ext/nokogiri/html4_sax_push_parser.c similarity index 85% rename from ext/nokogiri/html_sax_push_parser.c rename to ext/nokogiri/html4_sax_push_parser.c index 30f3e18465..9dc4a8c2c2 100644 --- a/ext/nokogiri/html_sax_push_parser.c +++ b/ext/nokogiri/html4_sax_push_parser.c @@ -1,6 +1,6 @@ #include -VALUE cNokogiriHtmlSaxPushParser; +VALUE cNokogiriHtml4SaxPushParser; /* * call-seq: @@ -88,8 +88,8 @@ void noko_init_html_sax_push_parser() { assert(cNokogiriXmlSaxPushParser); - cNokogiriHtmlSaxPushParser = rb_define_class_under(mNokogiriHtmlSax, "PushParser", cNokogiriXmlSaxPushParser); + cNokogiriHtml4SaxPushParser = rb_define_class_under(mNokogiriHtml4Sax, "PushParser", cNokogiriXmlSaxPushParser); - rb_define_private_method(cNokogiriHtmlSaxPushParser, "initialize_native", initialize_native, 3); - rb_define_private_method(cNokogiriHtmlSaxPushParser, "native_write", native_write, 2); + rb_define_private_method(cNokogiriHtml4SaxPushParser, "initialize_native", initialize_native, 3); + rb_define_private_method(cNokogiriHtml4SaxPushParser, "native_write", native_write, 2); } diff --git a/ext/nokogiri/libxml2_backwards_compat.c b/ext/nokogiri/libxml2_backwards_compat.c index 2e084b9235..f5255cb989 100644 --- a/ext/nokogiri/libxml2_backwards_compat.c +++ b/ext/nokogiri/libxml2_backwards_compat.c @@ -20,14 +20,14 @@ xmlFirstElementChild(xmlNodePtr parent) return (NULL); } switch (parent->type) { - case XML_ELEMENT_NODE: - case XML_ENTITY_NODE: - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - cur = parent->children; - break; - default: - return (NULL); + case XML_ELEMENT_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + cur = parent->children; + break; + default: + return (NULL); } while (cur != NULL) { if (cur->type == XML_ELEMENT_NODE) { @@ -57,20 +57,20 @@ xmlNextElementSibling(xmlNodePtr node) return (NULL); } switch (node->type) { - case XML_ELEMENT_NODE: - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: - case XML_ENTITY_REF_NODE: - case XML_ENTITY_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_DTD_NODE: - case XML_XINCLUDE_START: - case XML_XINCLUDE_END: - node = node->next; - break; - default: - return (NULL); + case XML_ELEMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_ENTITY_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DTD_NODE: + case XML_XINCLUDE_START: + case XML_XINCLUDE_END: + node = node->next; + break; + default: + return (NULL); } while (node != NULL) { if (node->type == XML_ELEMENT_NODE) { @@ -101,14 +101,14 @@ xmlLastElementChild(xmlNodePtr parent) return (NULL); } switch (parent->type) { - case XML_ELEMENT_NODE: - case XML_ENTITY_NODE: - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - cur = parent->last; - break; - default: - return (NULL); + case XML_ELEMENT_NODE: + case XML_ENTITY_NODE: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + cur = parent->last; + break; + default: + return (NULL); } while (cur != NULL) { if (cur->type == XML_ELEMENT_NODE) { diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index fb255f03e3..b82329cccf 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -1,8 +1,10 @@ #include VALUE mNokogiri ; -VALUE mNokogiriHtml ; -VALUE mNokogiriHtmlSax ; +VALUE mNokogiriGumbo ; +VALUE mNokogiriHtml4 ; +VALUE mNokogiriHtml4Sax ; +VALUE mNokogiriHtml5 ; VALUE mNokogiriXml ; VALUE mNokogiriXmlSax ; VALUE mNokogiriXmlXpath ; @@ -13,38 +15,39 @@ VALUE cNokogiriXmlCharacterData; VALUE cNokogiriXmlElement; VALUE cNokogiriXmlXpathSyntaxError; -void noko_init_xml_attr(); -void noko_init_xml_attribute_decl(); -void noko_init_xml_cdata(); -void noko_init_xml_comment(); -void noko_init_xml_document(); -void noko_init_xml_document_fragment(); -void noko_init_xml_dtd(); -void noko_init_xml_element_content(); -void noko_init_xml_element_decl(); -void noko_init_xml_encoding_handler(); -void noko_init_xml_entity_decl(); -void noko_init_xml_entity_reference(); -void noko_init_xml_namespace(); -void noko_init_xml_node(); -void noko_init_xml_node_set(); -void noko_init_xml_processing_instruction(); -void noko_init_xml_reader(); -void noko_init_xml_relax_ng(); -void noko_init_xml_sax_parser(); -void noko_init_xml_sax_parser_context(); -void noko_init_xml_sax_push_parser(); -void noko_init_xml_schema(); -void noko_init_xml_syntax_error(); -void noko_init_xml_text(); -void noko_init_xml_xpath_context(); -void noko_init_xslt_stylesheet(); -void noko_init_html_document(); -void noko_init_html_element_description(); -void noko_init_html_entity_lookup(); -void noko_init_html_sax_parser_context(); -void noko_init_html_sax_push_parser(); -void noko_init_test_global_handlers(); +void noko_init_xml_attr(void); +void noko_init_xml_attribute_decl(void); +void noko_init_xml_cdata(void); +void noko_init_xml_comment(void); +void noko_init_xml_document(void); +void noko_init_xml_document_fragment(void); +void noko_init_xml_dtd(void); +void noko_init_xml_element_content(void); +void noko_init_xml_element_decl(void); +void noko_init_xml_encoding_handler(void); +void noko_init_xml_entity_decl(void); +void noko_init_xml_entity_reference(void); +void noko_init_xml_namespace(void); +void noko_init_xml_node(void); +void noko_init_xml_node_set(void); +void noko_init_xml_processing_instruction(void); +void noko_init_xml_reader(void); +void noko_init_xml_relax_ng(void); +void noko_init_xml_sax_parser(void); +void noko_init_xml_sax_parser_context(void); +void noko_init_xml_sax_push_parser(void); +void noko_init_xml_schema(void); +void noko_init_xml_syntax_error(void); +void noko_init_xml_text(void); +void noko_init_xml_xpath_context(void); +void noko_init_xslt_stylesheet(void); +void noko_init_html_document(void); +void noko_init_html_element_description(void); +void noko_init_html_entity_lookup(void); +void noko_init_html_sax_parser_context(void); +void noko_init_html_sax_push_parser(void); +void noko_init_gumbo(void); +void noko_init_test_global_handlers(void); static ID id_read, id_write; @@ -152,12 +155,14 @@ void Init_nokogiri() { mNokogiri = rb_define_module("Nokogiri"); + mNokogiriGumbo = rb_define_module_under(mNokogiri, "Gumbo"); + mNokogiriHtml4 = rb_define_module_under(mNokogiri, "HTML4"); + mNokogiriHtml4Sax = rb_define_module_under(mNokogiriHtml4, "SAX"); + mNokogiriHtml5 = rb_define_module_under(mNokogiri, "HTML5"); mNokogiriXml = rb_define_module_under(mNokogiri, "XML"); - mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML"); - mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT"); - mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath"); mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX"); - mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX"); + mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath"); + mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT"); rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION)); rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion)); @@ -191,9 +196,35 @@ Init_nokogiri() rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS)); #endif +#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES) + /* + * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this + * issue in libxml 2.9.12: + * + * https://github.com/sparklemotion/nokogiri/issues/2241 + * + * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip + * this config only for the specific libxml2 versions 2.9.12. + * + * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its + * default memory management functions (recall that this config was introduced to reduce memory + * bloat and allow Ruby to GC more often); but we should *really* test with production workloads + * before making that kind of a potentially-invasive change. + */ + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default")); +#else + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby")); xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup); +#endif xmlInitParser(); + exsltRegisterAll(); + + if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) { + rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue); + } else { + rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse); + } cNokogiriSyntaxError = rb_define_class_under(mNokogiri, "SyntaxError", rb_eStandardError); noko_init_xml_syntax_error(); @@ -238,6 +269,7 @@ Init_nokogiri() noko_init_xml_document_fragment(); noko_init_xml_document(); noko_init_html_document(); + noko_init_gumbo(); noko_init_test_global_handlers(); diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 8b3cc9e845..399967ebd0 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -1,7 +1,7 @@ #ifndef NOKOGIRI_NATIVE #define NOKOGIRI_NATIVE -#if _MSC_VER +#ifdef _MSC_VER # ifndef WIN32_LEAN_AND_MEAN # define WIN32_LEAN_AND_MEAN # endif /* WIN32_LEAN_AND_MEAN */ @@ -15,7 +15,7 @@ # include #endif -#if _WIN32 +#ifdef _WIN32 # define NOKOPUBFUN __declspec(dllexport) # define NOKOPUBVAR __declspec(dllexport) extern #else @@ -69,6 +69,7 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent); #include #include #include +#include #define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str))) #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding()) @@ -92,12 +93,16 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent); NOKOPUBVAR VALUE mNokogiri ; -NOKOPUBVAR VALUE mNokogiriHtml ; -NOKOPUBVAR VALUE mNokogiriHtmlSax ; +NOKOPUBVAR VALUE mNokogiriGumbo ; +NOKOPUBVAR VALUE mNokogiriHtml4 ; +NOKOPUBVAR VALUE mNokogiriHtml4Sax ; +NOKOPUBVAR VALUE mNokogiriHtml5 ; NOKOPUBVAR VALUE mNokogiriXml ; NOKOPUBVAR VALUE mNokogiriXmlSax ; +NOKOPUBVAR VALUE mNokogiriXmlXpath ; NOKOPUBVAR VALUE mNokogiriXslt ; +NOKOPUBVAR VALUE cNokogiriEncodingHandler; NOKOPUBVAR VALUE cNokogiriSyntaxError; NOKOPUBVAR VALUE cNokogiriXmlAttr; NOKOPUBVAR VALUE cNokogiriXmlAttributeDecl; @@ -128,10 +133,11 @@ NOKOPUBVAR VALUE cNokogiriXmlXpathContext; NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError; NOKOPUBVAR VALUE cNokogiriXsltStylesheet ; -NOKOPUBVAR VALUE cNokogiriHtmlDocument ; -NOKOPUBVAR VALUE cNokogiriHtmlSaxPushParser ; -NOKOPUBVAR VALUE cNokogiriHtmlElementDescription ; -NOKOPUBVAR VALUE cNokogiriHtmlSaxParserContext; +NOKOPUBVAR VALUE cNokogiriHtml4Document ; +NOKOPUBVAR VALUE cNokogiriHtml4SaxPushParser ; +NOKOPUBVAR VALUE cNokogiriHtml4ElementDescription ; +NOKOPUBVAR VALUE cNokogiriHtml4SaxParserContext; +NOKOPUBVAR VALUE cNokogiriHtml5Document ; typedef struct _nokogiriTuple { VALUE doc; @@ -164,6 +170,9 @@ int noko_io_read(void *ctx, char *buffer, int len); int noko_io_write(void *ctx, char *buffer, int len); int noko_io_close(void *ctx); +#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) +#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) + VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ; VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ; VALUE noko_xml_node_attrs(xmlNodePtr node) ; @@ -177,7 +186,8 @@ VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ; VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv); VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc); -NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */ +NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, + xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */ #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private)) #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc) @@ -190,6 +200,15 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* depr #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self) #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple) +#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) +#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) + +#if HAVE_RB_CATEGORY_WARNING +# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define NOKO_WARN_DEPRECATION(message) rb_warning(message) +#endif + void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data, xmlStructuredErrorFunc handler); diff --git a/ext/nokogiri/xml_attr.c b/ext/nokogiri/xml_attr.c index 63e5498b53..9fbda17320 100644 --- a/ext/nokogiri/xml_attr.c +++ b/ext/nokogiri/xml_attr.c @@ -16,7 +16,7 @@ set_value(VALUE self, VALUE content) xmlChar *value; xmlNode *cur; - Data_Get_Struct(self, xmlAttr, attr); + Noko_Node_Get_Struct(self, xmlAttr, attr); if (attr->children) { xmlFreeNodeList(attr->children); @@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass) rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document"); } - Data_Get_Struct(document, xmlDoc, xml_doc); + Noko_Node_Get_Struct(document, xmlDoc, xml_doc); node = xmlNewDocProp( xml_doc, diff --git a/ext/nokogiri/xml_attribute_decl.c b/ext/nokogiri/xml_attribute_decl.c index 8603ff9553..951e393ce1 100644 --- a/ext/nokogiri/xml_attribute_decl.c +++ b/ext/nokogiri/xml_attribute_decl.c @@ -12,7 +12,7 @@ static VALUE attribute_type(VALUE self) { xmlAttributePtr node; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); return INT2NUM((long)node->atype); } @@ -26,7 +26,7 @@ static VALUE default_value(VALUE self) { xmlAttributePtr node; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); if (node->defaultValue) { return NOKOGIRI_STR_NEW2(node->defaultValue); } return Qnil; @@ -45,7 +45,7 @@ enumeration(VALUE self) xmlEnumerationPtr enm; VALUE list; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); list = rb_ary_new(); enm = node->tree; diff --git a/ext/nokogiri/xml_cdata.c b/ext/nokogiri/xml_cdata.c index 954f3ab3e3..6ca23a9eef 100644 --- a/ext/nokogiri/xml_cdata.c +++ b/ext/nokogiri/xml_cdata.c @@ -25,7 +25,7 @@ new (int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &doc, &content, &rest); - Data_Get_Struct(doc, xmlDoc, xml_doc); + Noko_Node_Get_Struct(doc, xmlDoc, xml_doc); if (!NIL_P(content)) { content_str = (xmlChar *)StringValuePtr(content); diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index c76842ef57..f79ee137bf 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -6,19 +6,19 @@ static int dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc) { switch (node->type) { - case XML_ATTRIBUTE_NODE: - xmlFreePropList((xmlAttrPtr)node); - break; - case XML_NAMESPACE_DECL: - xmlFreeNs((xmlNsPtr)node); - break; - case XML_DTD_NODE: - xmlFreeDtd((xmlDtdPtr)node); - break; - default: - if (node->parent == NULL) { - xmlAddChild((xmlNodePtr)doc, node); - } + case XML_ATTRIBUTE_NODE: + xmlFreePropList((xmlAttrPtr)node); + break; + case XML_NAMESPACE_DECL: + xmlFreeNs((xmlNsPtr)node); + break; + case XML_DTD_NODE: + xmlFreeDtd((xmlDtdPtr)node); + break; + default: + if (node->parent == NULL) { + xmlAddChild((xmlNodePtr)doc, node); + } } return ST_CONTINUE; } @@ -104,7 +104,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node) (node->type == XML_XINCLUDE_START) || (node->type == XML_XINCLUDE_END)) && node->nsDef) { - xmlFreeNsList(node->nsDef); + xmlNsPtr curr = node->nsDef; + while (curr) { + noko_xml_document_pin_namespace(curr, node->doc); + curr = curr->next; + } node->nsDef = NULL; } @@ -161,7 +165,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root) rb_obj_class(rb_new_root)); } - Data_Get_Struct(rb_new_root, xmlNode, c_new_root); + Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root); /* If the new root's document is not the same as the current document, * then we need to dup the node in to this document. */ @@ -213,7 +217,7 @@ set_encoding(VALUE self, VALUE encoding) Data_Get_Struct(self, xmlDoc, doc); if (doc->encoding) { - free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */ + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding)); } doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding)); @@ -533,59 +537,59 @@ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node) static VALUE rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self) { - VALUE mode; - VALUE incl_ns; - VALUE with_comments; - xmlChar **ns; - long ns_len, i; + VALUE rb_mode; + VALUE rb_namespaces; + VALUE rb_comments_p; + xmlChar **c_namespaces; - xmlDocPtr doc; - xmlOutputBufferPtr buf; - xmlC14NIsVisibleCallback cb = NULL; - void *ctx = NULL; + xmlDocPtr c_doc; + xmlOutputBufferPtr c_obuf; + xmlC14NIsVisibleCallback c_callback_wrapper = NULL; + void *rb_callback = NULL; VALUE rb_cStringIO; - VALUE io; + VALUE rb_io; - rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments); + rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p); + if (!NIL_P(rb_mode)) { Check_Type(rb_mode, T_FIXNUM); } + if (!NIL_P(rb_namespaces)) { Check_Type(rb_namespaces, T_ARRAY); } - Data_Get_Struct(self, xmlDoc, doc); + Data_Get_Struct(self, xmlDoc, c_doc); rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO")); - io = rb_class_new_instance(0, 0, rb_cStringIO); - buf = xmlAllocOutputBuffer(NULL); + rb_io = rb_class_new_instance(0, 0, rb_cStringIO); + c_obuf = xmlAllocOutputBuffer(NULL); - buf->writecallback = (xmlOutputWriteCallback)noko_io_write; - buf->closecallback = (xmlOutputCloseCallback)noko_io_close; - buf->context = (void *)io; + c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write; + c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close; + c_obuf->context = (void *)rb_io; if (rb_block_given_p()) { - cb = block_caller; - ctx = (void *)rb_block_proc(); + c_callback_wrapper = block_caller; + rb_callback = (void *)rb_block_proc(); } - if (NIL_P(incl_ns)) { - ns = NULL; + if (NIL_P(rb_namespaces)) { + c_namespaces = NULL; } else { - Check_Type(incl_ns, T_ARRAY); - ns_len = RARRAY_LEN(incl_ns); - ns = calloc((size_t)ns_len + 1, sizeof(xmlChar *)); - for (i = 0 ; i < ns_len ; i++) { - VALUE entry = rb_ary_entry(incl_ns, i); - ns[i] = (xmlChar *)StringValueCStr(entry); + long ns_len = RARRAY_LEN(rb_namespaces); + c_namespaces = calloc((size_t)ns_len + 1, sizeof(xmlChar *)); + for (int j = 0 ; j < ns_len ; j++) { + VALUE entry = rb_ary_entry(rb_namespaces, j); + c_namespaces[j] = (xmlChar *)StringValueCStr(entry); } } + xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback, + (int)(NIL_P(rb_mode) ? 0 : NUM2INT(rb_mode)), + c_namespaces, + (int)RTEST(rb_comments_p), + c_obuf); - xmlC14NExecute(doc, cb, ctx, - (int)(NIL_P(mode) ? 0 : NUM2INT(mode)), - ns, - (int) RTEST(with_comments), - buf); - - xmlOutputBufferClose(buf); + free(c_namespaces); + xmlOutputBufferClose(c_obuf); - return rb_funcall(io, rb_intern("string"), 0); + return rb_funcall(rb_io, rb_intern("string"), 0); } VALUE diff --git a/ext/nokogiri/xml_document_fragment.c b/ext/nokogiri/xml_document_fragment.c index f509815fda..81639576d7 100644 --- a/ext/nokogiri/xml_document_fragment.c +++ b/ext/nokogiri/xml_document_fragment.c @@ -28,8 +28,6 @@ new (int argc, VALUE *argv, VALUE klass) rb_node = noko_xml_node_wrap(klass, node); rb_obj_call_init(rb_node, argc, argv); - if (rb_block_given_p()) { rb_yield(rb_node); } - return rb_node; } diff --git a/ext/nokogiri/xml_dtd.c b/ext/nokogiri/xml_dtd.c index f9cb71fea6..05be753b51 100644 --- a/ext/nokogiri/xml_dtd.c +++ b/ext/nokogiri/xml_dtd.c @@ -44,7 +44,7 @@ entities(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->entities) { return Qnil; } @@ -57,9 +57,9 @@ entities(VALUE self) /* * call-seq: - * notations + * notations() → Hash * - * Get a hash of the notations for this DTD. + * [Returns] All the notations for this DTD in a Hash of Notation +name+ to Notation. */ static VALUE notations(VALUE self) @@ -67,7 +67,7 @@ notations(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->notations) { return Qnil; } @@ -90,7 +90,7 @@ attributes(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); hash = rb_hash_new(); @@ -113,7 +113,7 @@ elements(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->elements) { return Qnil; } @@ -138,8 +138,8 @@ validate(VALUE self, VALUE document) xmlValidCtxtPtr ctxt; VALUE error_list; - Data_Get_Struct(self, xmlDtd, dtd); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(document, xmlDoc, doc); error_list = rb_ary_new(); ctxt = xmlNewValidCtxt(); @@ -165,7 +165,7 @@ static VALUE system_id(VALUE self) { xmlDtdPtr dtd; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->SystemID) { return Qnil; } @@ -182,7 +182,7 @@ static VALUE external_id(VALUE self) { xmlDtdPtr dtd; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->ExternalID) { return Qnil; } diff --git a/ext/nokogiri/xml_element_content.c b/ext/nokogiri/xml_element_content.c index dc4debb290..64176f0a1f 100644 --- a/ext/nokogiri/xml_element_content.c +++ b/ext/nokogiri/xml_element_content.c @@ -116,6 +116,8 @@ noko_init_xml_element_content() { cNokogiriXmlElementContent = rb_define_class_under(mNokogiriXml, "ElementContent", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlElementContent); + rb_define_method(cNokogiriXmlElementContent, "name", get_name, 0); rb_define_method(cNokogiriXmlElementContent, "type", get_type, 0); rb_define_method(cNokogiriXmlElementContent, "occur", get_occur, 0); diff --git a/ext/nokogiri/xml_element_decl.c b/ext/nokogiri/xml_element_decl.c index 178c69b80d..392a300372 100644 --- a/ext/nokogiri/xml_element_decl.c +++ b/ext/nokogiri/xml_element_decl.c @@ -14,7 +14,7 @@ static VALUE element_type(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); return INT2NUM((long)node->etype); } @@ -28,7 +28,7 @@ static VALUE content(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); if (!node->content) { return Qnil; } @@ -48,7 +48,7 @@ static VALUE prefix(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); if (!node->prefix) { return Qnil; } diff --git a/ext/nokogiri/xml_encoding_handler.c b/ext/nokogiri/xml_encoding_handler.c index 0202b3c27f..8d5b88ecd5 100644 --- a/ext/nokogiri/xml_encoding_handler.c +++ b/ext/nokogiri/xml_encoding_handler.c @@ -1,69 +1,84 @@ #include +VALUE cNokogiriEncodingHandler; + + +static void +_xml_encoding_handler_dealloc(xmlCharEncodingHandlerPtr c_handler) +{ + /* make sure iconv handlers are cleaned up and freed */ + xmlCharEncCloseFunc(c_handler); +} + + /* * call-seq: Nokogiri::EncodingHandler.[](name) * * Get the encoding handler for +name+ */ static VALUE -get(VALUE klass, VALUE key) +rb_xml_encoding_handler_s_get(VALUE klass, VALUE key) { xmlCharEncodingHandlerPtr handler; handler = xmlFindCharEncodingHandler(StringValueCStr(key)); if (handler) { - return Data_Wrap_Struct(klass, NULL, NULL, handler); + return Data_Wrap_Struct(klass, NULL, _xml_encoding_handler_dealloc, handler); } return Qnil; } + /* * call-seq: Nokogiri::EncodingHandler.delete(name) * * Delete the encoding alias named +name+ */ static VALUE -delete (VALUE klass, VALUE name) +rb_xml_encoding_handler_s_delete(VALUE klass, VALUE name) { if (xmlDelEncodingAlias(StringValueCStr(name))) { return Qnil; } return Qtrue; } + /* * call-seq: Nokogiri::EncodingHandler.alias(from, to) * * Alias encoding handler with name +from+ to name +to+ */ static VALUE -alias(VALUE klass, VALUE from, VALUE to) +rb_xml_encoding_handler_s_alias(VALUE klass, VALUE from, VALUE to) { xmlAddEncodingAlias(StringValueCStr(from), StringValueCStr(to)); return to; } + /* * call-seq: Nokogiri::EncodingHandler.clear_aliases! * * Remove all encoding aliases. */ static VALUE -clear_aliases(VALUE klass) +rb_xml_encoding_handler_s_clear_aliases(VALUE klass) { xmlCleanupEncodingAliases(); return klass; } + /* * call-seq: name * * Get the name of this EncodingHandler */ static VALUE -name(VALUE self) +rb_xml_encoding_handler_name(VALUE self) { xmlCharEncodingHandlerPtr handler; @@ -72,14 +87,18 @@ name(VALUE self) return NOKOGIRI_STR_NEW2(handler->name); } + void noko_init_xml_encoding_handler() { - VALUE klass = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject); + cNokogiriEncodingHandler = rb_define_class_under(mNokogiri, "EncodingHandler", rb_cObject); + + rb_undef_alloc_func(cNokogiriEncodingHandler); + + rb_define_singleton_method(cNokogiriEncodingHandler, "[]", rb_xml_encoding_handler_s_get, 1); + rb_define_singleton_method(cNokogiriEncodingHandler, "delete", rb_xml_encoding_handler_s_delete, 1); + rb_define_singleton_method(cNokogiriEncodingHandler, "alias", rb_xml_encoding_handler_s_alias, 2); + rb_define_singleton_method(cNokogiriEncodingHandler, "clear_aliases!", rb_xml_encoding_handler_s_clear_aliases, 0); - rb_define_singleton_method(klass, "[]", get, 1); - rb_define_singleton_method(klass, "delete", delete, 1); - rb_define_singleton_method(klass, "alias", alias, 2); - rb_define_singleton_method(klass, "clear_aliases!", clear_aliases, 0); - rb_define_method(klass, "name", name, 0); + rb_define_method(cNokogiriEncodingHandler, "name", rb_xml_encoding_handler_name, 0); } diff --git a/ext/nokogiri/xml_entity_decl.c b/ext/nokogiri/xml_entity_decl.c index 50893d2da1..846509abe8 100644 --- a/ext/nokogiri/xml_entity_decl.c +++ b/ext/nokogiri/xml_entity_decl.c @@ -12,7 +12,7 @@ static VALUE original_content(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->orig) { return Qnil; } @@ -29,7 +29,7 @@ static VALUE get_content(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->content) { return Qnil; } @@ -46,7 +46,7 @@ static VALUE entity_type(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); return INT2NUM((int)node->etype); } @@ -61,7 +61,7 @@ static VALUE external_id(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->ExternalID) { return Qnil; } @@ -78,7 +78,7 @@ static VALUE system_id(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->SystemID) { return Qnil; } diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index f15bd57700..52b49c5207 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -25,23 +25,55 @@ VALUE cNokogiriXmlNamespace ; static void -dealloc_namespace(xmlNsPtr ns) +_xml_namespace_dealloc(void *ptr) { /* * this deallocator is only used for namespace nodes that are part of an xpath * node set. see noko_xml_namespace_wrap(). */ + xmlNsPtr ns = ptr; NOKOGIRI_DEBUG_START(ns) ; + if (ns->href) { - xmlFree((xmlChar *)(uintptr_t)ns->href); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href)); } if (ns->prefix) { - xmlFree((xmlChar *)(uintptr_t)ns->prefix); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix)); } xmlFree(ns); NOKOGIRI_DEBUG_END(ns) ; } +#ifdef HAVE_RB_GC_LOCATION +static void +_xml_namespace_update_references(void *ptr) +{ + xmlNsPtr ns = ptr; + if (ns->_private) { + ns->_private = (void *)rb_gc_location((VALUE)ns->_private); + } +} +#else +# define _xml_namespace_update_references 0 +#endif + +static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = { + "Nokogiri/XMLNamespace/WithDealloc", + {0, _xml_namespace_dealloc, 0, _xml_namespace_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; + +static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = { + "Nokogiri/XMLNamespace/WithoutDealloc", + {0, 0, 0, _xml_namespace_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; /* * call-seq: @@ -54,7 +86,7 @@ prefix(VALUE self) { xmlNsPtr ns; - Data_Get_Struct(self, xmlNs, ns); + Noko_Namespace_Get_Struct(self, xmlNs, ns); if (!ns->prefix) { return Qnil; } return NOKOGIRI_STR_NEW2(ns->prefix); @@ -71,7 +103,7 @@ href(VALUE self) { xmlNsPtr ns; - Data_Get_Struct(self, xmlNs, ns); + Noko_Namespace_Get_Struct(self, xmlNs, ns); if (!ns->href) { return Qnil; } return NOKOGIRI_STR_NEW2(ns->href); @@ -87,14 +119,18 @@ noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document) } if (c_document) { - rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, c_namespace); + rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace, + &nokogiri_xml_namespace_type_without_dealloc, + c_namespace); if (DOC_RUBY_OBJECT_TEST(c_document)) { rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document)); rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace); } } else { - rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, c_namespace); + rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace, + &nokogiri_xml_namespace_type_with_dealloc, + c_namespace); } c_namespace->_private = (void *)rb_namespace; @@ -113,6 +149,8 @@ noko_init_xml_namespace() { cNokogiriXmlNamespace = rb_define_class_under(mNokogiriXml, "Namespace", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlNamespace); + rb_define_method(cNokogiriXmlNamespace, "prefix", prefix, 0); rb_define_method(cNokogiriXmlNamespace, "href", href, 0); } diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 93e37c35a6..01b5602b65 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1,23 +1,30 @@ #include -VALUE cNokogiriXmlNode ; +// :stopdoc: +VALUE cNokogiriXmlNode ; static ID id_decorate, id_decorate_bang; +typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr); + #ifdef DEBUG static void -debug_node_dealloc(xmlNodePtr x) +_xml_node_dealloc(xmlNodePtr x) { NOKOGIRI_DEBUG_START(x) NOKOGIRI_DEBUG_END(x) } #else -# define debug_node_dealloc 0 +# define _xml_node_dealloc 0 #endif static void -mark(xmlNodePtr node) +_xml_node_mark(xmlNodePtr node) { + if (!DOC_RUBY_OBJECT_TEST(node->doc)) { + return; + } + xmlDocPtr doc = node->doc; if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) { if (DOC_RUBY_OBJECT_TEST(doc)) { @@ -28,14 +35,37 @@ mark(xmlNodePtr node) } } -/* :nodoc: */ -typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr); +#ifdef HAVE_RB_GC_LOCATION +static void +_xml_node_update_references(xmlNodePtr node) +{ + if (node->_private) { + node->_private = (void *)rb_gc_location((VALUE)node->_private); + } +} +#endif + +typedef void (*gc_callback_t)(void *); + +static const rb_data_type_t nokogiri_node_type = { + "Nokogiri/XMLNode", + { + (gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0, +#ifdef HAVE_RB_GC_LOCATION + (gc_callback_t)_xml_node_update_references +#endif + }, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; -/* :nodoc: */ static void relink_namespace(xmlNodePtr reparented) { xmlNodePtr child; + xmlAttrPtr attr; if (reparented->type != XML_ATTRIBUTE_NODE && reparented->type != XML_ELEMENT_NODE) { return; } @@ -69,7 +99,9 @@ relink_namespace(xmlNodePtr reparented) if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; } /* Make sure that our reparented node has the correct namespaces */ - if (!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent) { + if (!reparented->ns && + (reparented->doc != (xmlDocPtr)reparented->parent) && + (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) { xmlSetNs(reparented, reparented->parent->ns); } @@ -132,15 +164,17 @@ relink_namespace(xmlNodePtr reparented) } if (reparented->type == XML_ELEMENT_NODE) { - child = (xmlNodePtr)((xmlElementPtr)reparented)->attributes; - while (NULL != child) { - relink_namespace(child); - child = child->next; + attr = reparented->properties; + while (NULL != attr) { + relink_namespace((xmlNodePtr)attr); + attr = attr->next; } } } -/* :nodoc: */ + +/* internal function meant to wrap xmlReplaceNode + and fix some issues we have with libxml2 merging nodes */ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node) { @@ -165,12 +199,23 @@ xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node) return retval ; } -/* :nodoc: */ + +static void +raise_if_ancestor_of_self(xmlNodePtr self) +{ + for (xmlNodePtr ancestor = self->parent ; ancestor ; ancestor = ancestor->parent) { + if (self == ancestor) { + rb_raise(rb_eRuntimeError, "cycle detected: node '%s' is an ancestor of itself", self->name); + } + } +} + + static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf) { VALUE reparented_obj ; - xmlNodePtr reparentee, pivot, reparented, next_text, new_next_text, parent ; + xmlNodePtr reparentee, original_reparentee, pivot, reparented, next_text, new_next_text, parent ; int original_ns_prefix_is_default = 0 ; if (!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode)) { @@ -180,8 +225,8 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node"); } - Data_Get_Struct(reparentee_obj, xmlNode, reparentee); - Data_Get_Struct(pivot_obj, xmlNode, pivot); + Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee); + Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot); /* * Check if nodes given are appropriate to have a parent-child @@ -197,66 +242,66 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func if (parent) { switch (parent->type) { - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - switch (reparentee->type) { - case XML_ELEMENT_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_DOCUMENT_TYPE_NODE: - /* - * The DOM specification says no to adding text-like nodes - * directly to a document, but we allow it for compatibility. - */ - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: - case XML_ENTITY_REF_NODE: - goto ok; - default: + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + switch (reparentee->type) { + case XML_ELEMENT_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + /* + * The DOM specification says no to adding text-like nodes + * directly to a document, but we allow it for compatibility. + */ + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + goto ok; + default: + break; + } break; - } - break; - case XML_DOCUMENT_FRAG_NODE: - case XML_ENTITY_REF_NODE: - case XML_ELEMENT_NODE: - switch (reparentee->type) { - case XML_ELEMENT_NODE: - case XML_PI_NODE: - case XML_COMMENT_NODE: - case XML_TEXT_NODE: - case XML_CDATA_SECTION_NODE: + case XML_DOCUMENT_FRAG_NODE: case XML_ENTITY_REF_NODE: - goto ok; - default: + case XML_ELEMENT_NODE: + switch (reparentee->type) { + case XML_ELEMENT_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + goto ok; + default: + break; + } + break; + case XML_ATTRIBUTE_NODE: + switch (reparentee->type) { + case XML_TEXT_NODE: + case XML_ENTITY_REF_NODE: + goto ok; + default: + break; + } break; - } - break; - case XML_ATTRIBUTE_NODE: - switch (reparentee->type) { case XML_TEXT_NODE: - case XML_ENTITY_REF_NODE: - goto ok; + /* + * xmlAddChild() breaks the DOM specification in that it allows + * adding a text node to another, in which case text nodes are + * coalesced, but since our JRuby version does not support such + * operation, we should inhibit it. + */ + break; default: break; - } - break; - case XML_TEXT_NODE: - /* - * xmlAddChild() breaks the DOM specification in that it allows - * adding a text node to another, in which case text nodes are - * coalesced, but since our JRuby version does not support such - * operation, we should inhibit it. - */ - break; - default: - break; } rb_raise(rb_eArgError, "cannot reparent %s there", rb_obj_classname(reparentee_obj)); } ok: - xmlUnlinkNode(reparentee); + original_reparentee = reparentee; if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) { /* @@ -308,11 +353,13 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func * issue #391, where new node's prefix may become the string "default" * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior. */ - xmlFree((xmlChar *)reparentee->ns->prefix); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix)); reparentee->ns->prefix = NULL; } } + xmlUnlinkNode(original_reparentee); + if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) { /* @@ -352,49 +399,421 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func * adjacent text nodes. */ DATA_PTR(reparentee_obj) = reparented ; - - relink_namespace(reparented); - reparented_obj = noko_xml_node_wrap(Qnil, reparented); rb_funcall(reparented_obj, id_decorate_bang, 0); + /* if we've created a cycle, raise an exception */ + raise_if_ancestor_of_self(reparented); + + relink_namespace(reparented); + return reparented_obj ; } +// :startdoc: /* - * call-seq: - * document + * :call-seq: + * add_namespace_definition(prefix, href) → Nokogiri::XML::Namespace + * add_namespace(prefix, href) → Nokogiri::XML::Namespace + * + * :category: Manipulating Document Structure + * + * Adds a namespace definition to this node with +prefix+ using +href+ value, as if this node had + * included an attribute "xmlns:prefix=href". + * + * A default namespace definition for this node can be added by passing +nil+ for +prefix+. + * + * [Parameters] + * - +prefix+ (String, +nil+) An {XML Name}[https://www.w3.org/TR/xml-names/#ns-decl] + * - +href+ (String) The {URI reference}[https://www.w3.org/TR/xml-names/#sec-namespaces] + * + * [Returns] The new Nokogiri::XML::Namespace + * + * *Example:* adding a non-default namespace definition + * + * doc = Nokogiri::XML("") + * inventory = doc.at_css("inventory") + * inventory.add_namespace_definition("automobile", "http://alices-autos.com/") + * inventory.add_namespace_definition("bicycle", "http://bobs-bikes.com/") + * inventory.add_child("Michelin model XGV, size 75R") + * doc.to_xml + * # => "\n" + + * # "\n" + + * # " \n" + + * # " Michelin model XGV, size 75R\n" + + * # " \n" + + * # "\n" + * + * *Example:* adding a default namespace definition + * + * doc = Nokogiri::XML("Michelin model XGV, size 75R") + * doc.at_css("tire").add_namespace_definition(nil, "http://bobs-bikes.com/") + * doc.to_xml + * # => "\n" + + * # "\n" + + * # " \n" + + * # " Michelin model XGV, size 75R\n" + + * # " \n" + + * # "\n" * - * Get the document for this Node */ static VALUE -document(VALUE self) +rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href) +{ + xmlNodePtr c_node, element; + xmlNsPtr c_namespace; + const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix)); + + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + element = c_node ; + + c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix); + + if (!c_namespace) { + if (c_node->type != XML_ELEMENT_NODE) { + element = c_node->parent; + } + c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix); + } + + if (!c_namespace) { + return Qnil ; + } + + if (NIL_P(rb_prefix) || c_node != element) { + xmlSetNs(c_node, c_namespace); + } + + return noko_xml_namespace_wrap(c_namespace, c_node->doc); +} + + +/* + * :call-seq: attribute(name) → Nokogiri::XML::Attr + * + * :category: Working With Node Attributes + * + * [Returns] Attribute (Nokogiri::XML::Attr) belonging to this node with name +name+. + * + * ⚠ Note that attribute namespaces are ignored and only the simple (non-namespace-prefixed) name is + * used to find a matching attribute. In case of a simple name collision, only one of the matching + * attributes will be returned. In this case, you will need to use #attribute_with_ns. + * + * *Example:* + * + * doc = Nokogiri::XML("") + * child = doc.at_css("child") + * child.attribute("size") # => # + * child.attribute("class") # => # + * + * *Example* showing that namespaced attributes will not be returned: + * + * ⚠ Note that only one of the two matching attributes is returned. + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * EOF + * doc.at_css("child").attribute("size") + * # => #(Attr:0x550 { + * # name = "size", + * # namespace = #(Namespace:0x564 { + * # prefix = "width", + * # href = "http://example.com/widths" + * # }), + * # value = "broad" + * # }) + */ +static VALUE +rb_xml_node_attribute(VALUE self, VALUE name) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + xmlAttrPtr prop; + Noko_Node_Get_Struct(self, xmlNode, node); + prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name)); + + if (! prop) { return Qnil; } + return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop); +} + + +/* + * :call-seq: attribute_nodes() → Array + * + * :category: Working With Node Attributes + * + * [Returns] Attributes (an Array of Nokogiri::XML::Attr) belonging to this node. + * + * Note that this is the preferred alternative to #attributes when the simple + * (non-namespace-prefixed) attribute names may collide. + * + * *Example:* + * + * Contrast this with the colliding-name example from #attributes. + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * EOF + * doc.at_css("child").attribute_nodes + * # => [#(Attr:0x550 { + * # name = "size", + * # namespace = #(Namespace:0x564 { + * # prefix = "width", + * # href = "http://example.com/widths" + * # }), + * # value = "broad" + * # }), + * # #(Attr:0x578 { + * # name = "size", + * # namespace = #(Namespace:0x58c { + * # prefix = "height", + * # href = "http://example.com/heights" + * # }), + * # value = "tall" + * # })] + */ +static VALUE +rb_xml_node_attribute_nodes(VALUE rb_node) +{ + xmlNodePtr c_node; + + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + + return noko_xml_node_attrs(c_node); +} + + +/* + * :call-seq: attribute_with_ns(name, namespace) → Nokogiri::XML::Attr + * + * :category: Working With Node Attributes + * + * [Returns] + * Attribute (Nokogiri::XML::Attr) belonging to this node with matching +name+ and +namespace+. + * + * [Parameters] + * - +name+ (String): the simple (non-namespace-prefixed) name of the attribute + * - +namespace+ (String): the URI of the attribute's namespace + * + * See related: #attribute + * + * *Example:* + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * EOF + * doc.at_css("child").attribute_with_ns("size", "http://example.com/widths") + * # => #(Attr:0x550 { + * # name = "size", + * # namespace = #(Namespace:0x564 { + * # prefix = "width", + * # href = "http://example.com/widths" + * # }), + * # value = "broad" + * # }) + * doc.at_css("child").attribute_with_ns("size", "http://example.com/heights") + * # => #(Attr:0x578 { + * # name = "size", + * # namespace = #(Namespace:0x58c { + * # prefix = "height", + * # href = "http://example.com/heights" + * # }), + * # value = "tall" + * # }) + */ +static VALUE +rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace) +{ + xmlNodePtr node; + xmlAttrPtr prop; + Noko_Node_Get_Struct(self, xmlNode, node); + prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name), + NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace)); + + if (! prop) { return Qnil; } + return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop); +} + + + +/* + * call-seq: blank? → Boolean + * + * [Returns] +true+ if the node is an empty or whitespace-only text or cdata node, else +false+. + * + * *Example:* + * + * Nokogiri("").root.child.blank? # => false + * Nokogiri("\t \n").root.child.blank? # => true + * Nokogiri("").root.child.blank? # => true + * Nokogiri("not-blank").root.child + * .tap { |n| n.content = "" }.blank # => true + */ +static VALUE +rb_xml_node_blank_eh(VALUE self) +{ + xmlNodePtr node; + Noko_Node_Get_Struct(self, xmlNode, node); + return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ; +} + + +/* + * :call-seq: child() → Nokogiri::XML::Node + * + * :category: Traversing Document Structure + * + * [Returns] First of this node's children, or +nil+ if there are no children + * + * This is a convenience method and is equivalent to: + * + * node.children.first + * + * See related: #children + */ +static VALUE +rb_xml_node_child(VALUE self) +{ + xmlNodePtr node, child; + Noko_Node_Get_Struct(self, xmlNode, node); + + child = node->children; + if (!child) { return Qnil; } + + return noko_xml_node_wrap(Qnil, child); +} + + +/* + * :call-seq: children() → Nokogiri::XML::NodeSet + * + * :category: Traversing Document Structure + * + * [Returns] Nokogiri::XML::NodeSet containing this node's children. + */ +static VALUE +rb_xml_node_children(VALUE self) +{ + xmlNodePtr node; + xmlNodePtr child; + xmlNodeSetPtr set; + VALUE document; + VALUE node_set; + + Noko_Node_Get_Struct(self, xmlNode, node); + + child = node->children; + set = xmlXPathNodeSetCreate(child); + + document = DOC_RUBY_OBJECT(node->doc); + + if (!child) { return noko_xml_node_set_wrap(set, document); } + + child = child->next; + while (NULL != child) { + xmlXPathNodeSetAddUnique(set, child); + child = child->next; + } + + node_set = noko_xml_node_set_wrap(set, document); + + return node_set; +} + + +/* + * :call-seq: + * content() → String + * inner_text() → String + * text() → String + * to_str() → String + * + * [Returns] + * Contents of all the text nodes in this node's subtree, concatenated together into a single + * String. + * + * ⚠ Note that entities will _always_ be expanded in the returned String. + * + * See related: #inner_html + * + * *Example* of how entities are handled: + * + * Note that < becomes < in the returned String. + * + * doc = Nokogiri::XML.fragment("a < b") + * doc.at_css("child").content + * # => "a < b" + * + * *Example* of how a subtree is handled: + * + * Note that the tags are omitted and only the text node contents are returned, + * concatenated into a single string. + * + * doc = Nokogiri::XML.fragment("first second") + * doc.at_css("child").content + * # => "first second" + */ +static VALUE +rb_xml_node_content(VALUE self) +{ + xmlNodePtr node; + xmlChar *content; + + Noko_Node_Get_Struct(self, xmlNode, node); + + content = xmlNodeGetContent(node); + if (content) { + VALUE rval = NOKOGIRI_STR_NEW2(content); + xmlFree(content); + return rval; + } + return Qnil; +} + + +/* + * :call-seq: document() → Nokogiri::XML::Document + * + * :category: Traversing Document Structure + * + * [Returns] Parent Nokogiri::XML::Document for this node + */ +static VALUE +rb_xml_node_document(VALUE self) +{ + xmlNodePtr node; + Noko_Node_Get_Struct(self, xmlNode, node); return DOC_RUBY_OBJECT(node->doc); } /* - * call-seq: - * pointer_id + * :call-seq: pointer_id() → Integer * - * Get the internal pointer number + * [Returns] + * A unique id for this node based on the internal memory structures. This method is used by #== + * to determine node identity. */ static VALUE -pointer_id(VALUE self) +rb_xml_node_pointer_id(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return INT2NUM((long)(node)); } /* - * call-seq: - * encode_special_chars(string) + * :call-seq: encode_special_chars(string) → String * * Encode any special characters in +string+ */ @@ -405,7 +824,7 @@ encode_special_chars(VALUE self, VALUE string) xmlChar *encoded; VALUE encoded_str; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); encoded = xmlEncodeSpecialChars( node->doc, (const xmlChar *)StringValueCStr(string) @@ -418,8 +837,8 @@ encode_special_chars(VALUE self, VALUE string) } /* - * call-seq: - * create_internal_subset(name, external_id, system_id) + * :call-seq: + * create_internal_subset(name, external_id, system_id) * * Create the internal subset of a document. * @@ -436,7 +855,7 @@ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = node->doc; @@ -457,8 +876,8 @@ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i } /* - * call-seq: - * create_external_subset(name, external_id, system_id) + * :call-seq: + * create_external_subset(name, external_id, system_id) * * Create an external subset */ @@ -469,7 +888,7 @@ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = node->doc; @@ -490,8 +909,8 @@ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i } /* - * call-seq: - * external_subset + * :call-seq: + * external_subset() * * Get the external subset */ @@ -502,7 +921,7 @@ external_subset(VALUE self) xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (!node->doc) { return Qnil; } @@ -515,8 +934,8 @@ external_subset(VALUE self) } /* - * call-seq: - * internal_subset + * :call-seq: + * internal_subset() * * Get the internal subset */ @@ -527,7 +946,7 @@ internal_subset(VALUE self) xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (!node->doc) { return Qnil; } @@ -540,16 +959,19 @@ internal_subset(VALUE self) } /* - * call-seq: - * dup - * dup(depth) - * dup(depth, new_parent_doc) + * :call-seq: + * dup → Nokogiri::XML::Node + * dup(depth) → Nokogiri::XML::Node + * dup(depth, new_parent_doc) → Nokogiri::XML::Node * * Copy this node. - * An optional depth may be passed in. 0 is a shallow copy, 1 (the default) is a deep copy. - * An optional new_parent_doc may also be passed in, which will be the new - * node's parent document. Defaults to the current node's document. - * current document. + * + * [Parameters] + * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy. + * - +new_parent_doc+ + * The new node's parent Document. Defaults to the this node's document. + * + * [Returns] The new Nokgiri::XML::Node */ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) @@ -560,7 +982,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self) xmlDocPtr new_parent_doc; xmlNodePtr node, dup; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc); @@ -584,35 +1006,22 @@ duplicate_node(int argc, VALUE *argv, VALUE self) } /* - * call-seq: - * unlink - * - * Unlink this node from its current context. - */ -static VALUE -unlink_node(VALUE self) -{ - xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - xmlUnlinkNode(node); - noko_xml_document_pin_node(node); - return self; -} - -/* - * call-seq: - * blank? + * :call-seq: + * unlink() → self * - * Is this node blank? + * Unlink this node from its current context. */ static VALUE -blank_eh(VALUE self) +unlink_node(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ; + Noko_Node_Get_Struct(self, xmlNode, node); + xmlUnlinkNode(node); + noko_xml_document_pin_node(node); + return self; } + /* * call-seq: * next_sibling @@ -623,7 +1032,7 @@ static VALUE next_sibling(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = node->next; if (!sibling) { return Qnil; } @@ -641,7 +1050,7 @@ static VALUE previous_sibling(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = node->prev; if (!sibling) { return Qnil; } @@ -659,7 +1068,7 @@ static VALUE next_element(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = xmlNextElementSibling(node); if (!sibling) { return Qnil; } @@ -677,7 +1086,7 @@ static VALUE previous_element(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); /* * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7. @@ -699,60 +1108,34 @@ replace(VALUE self, VALUE new_node) VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper); xmlNodePtr pivot; - Data_Get_Struct(self, xmlNode, pivot); + Noko_Node_Get_Struct(self, xmlNode, pivot); noko_xml_document_pin_node(pivot); return reparent; } /* - * call-seq: - * children + * :call-seq: + * element_children() → NodeSet + * elements() → NodeSet * - * Get the list of children for this node as a NodeSet - */ -static VALUE -children(VALUE self) -{ - xmlNodePtr node; - xmlNodePtr child; - xmlNodeSetPtr set; - VALUE document; - VALUE node_set; - - Data_Get_Struct(self, xmlNode, node); - - child = node->children; - set = xmlXPathNodeSetCreate(child); - - document = DOC_RUBY_OBJECT(node->doc); - - if (!child) { return noko_xml_node_set_wrap(set, document); } - - child = child->next; - while (NULL != child) { - xmlXPathNodeSetAddUnique(set, child); - child = child->next; - } - - node_set = noko_xml_node_set_wrap(set, document); - - return node_set; -} - -/* - * call-seq: - * element_children + * [Returns] + * The node's child elements as a NodeSet. Only children that are elements will be returned, which + * notably excludes Text nodes. * - * Get the list of children for this node as a NodeSet. All nodes will be - * element nodes. + * *Example:* * - * Example: + * Note that #children returns the Text node "hello" while #element_children does not. * - * @doc.root.element_children.all? { |x| x.element? } # => true + * div = Nokogiri::HTML5("
helloworld").at_css("div") + * div.element_children + * # => [#]>] + * div.children + * # => [#, + * # #]>] */ static VALUE -element_children(VALUE self) +rb_xml_node_element_children(VALUE self) { xmlNodePtr node; xmlNodePtr child; @@ -760,7 +1143,7 @@ element_children(VALUE self) VALUE document; VALUE node_set; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlFirstElementChild(node); set = xmlXPathNodeSetCreate(child); @@ -781,38 +1164,25 @@ element_children(VALUE self) } /* - * call-seq: - * child + * :call-seq: + * first_element_child() → Node * - * Returns the child node - */ -static VALUE -child(VALUE self) -{ - xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); - - child = node->children; - if (!child) { return Qnil; } - - return noko_xml_node_wrap(Qnil, child); -} - -/* - * call-seq: - * first_element_child + * [Returns] The first child Node that is an element. * - * Returns the first child node of this node that is an element. + * *Example:* * - * Example: + * Note that the "hello" child, which is a Text node, is skipped and the element is + * returned. * - * @doc.root.first_element_child.element? # => true + * div = Nokogiri::HTML5("
helloworld").at_css("div") + * div.first_element_child + * # => #(Element:0x3c { name = "span", children = [ #(Text "world")] }) */ static VALUE -first_element_child(VALUE self) +rb_xml_node_first_element_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlFirstElementChild(node); if (!child) { return Qnil; } @@ -821,20 +1191,25 @@ first_element_child(VALUE self) } /* - * call-seq: - * last_element_child + * :call-seq: + * last_element_child() → Node * - * Returns the last child node of this node that is an element. + * [Returns] The last child Node that is an element. * - * Example: + * *Example:* * - * @doc.root.last_element_child.element? # => true + * Note that the "hello" child, which is a Text node, is skipped and the yes + * element is returned. + * + * div = Nokogiri::HTML5("
noyesskip
").at_css("div") + * div.last_element_child + * # => #(Element:0x3c { name = "span", children = [ #(Text "yes")] }) */ static VALUE -last_element_child(VALUE self) +rb_xml_node_last_element_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlLastElementChild(node); if (!child) { return Qnil; } @@ -852,7 +1227,7 @@ static VALUE key_eh(VALUE self, VALUE attribute) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) { return Qtrue; } @@ -869,7 +1244,7 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute), NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) { return Qtrue; @@ -888,7 +1263,7 @@ set(VALUE self, VALUE property, VALUE value) { xmlNodePtr node, cur; xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); /* If a matching attribute node already exists, then xmlSetProp will destroy * the existing node's children. However, if Nokogiri has a node object @@ -933,10 +1308,10 @@ get(VALUE self, VALUE rattribute) if (NIL_P(rattribute)) { return Qnil; } - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); attribute = xmlCharStrdup(StringValueCStr(rattribute)); - colon = (xmlChar *)(uintptr_t)xmlStrchr(attribute, (const xmlChar)':'); + colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':')); if (colon) { /* split the attribute string into separate prefix and name by * null-terminating the prefix at the colon */ @@ -975,10 +1350,10 @@ set_namespace(VALUE self, VALUE namespace) xmlNodePtr node; xmlNsPtr ns = NULL; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (!NIL_P(namespace)) { - Data_Get_Struct(namespace, xmlNs, ns); + Noko_Namespace_Get_Struct(namespace, xmlNs, ns); } xmlSetNs(node, ns); @@ -987,70 +1362,32 @@ set_namespace(VALUE self, VALUE namespace) } /* - * call-seq: - * attribute(name) + * :call-seq: + * namespace() → Namespace * - * Get the attribute node with +name+ - */ -static VALUE -attr(VALUE self, VALUE name) -{ - xmlNodePtr node; - xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); - prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name)); - - if (! prop) { return Qnil; } - return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop); -} - -/* - * call-seq: - * attribute_with_ns(name, namespace) + * [Returns] The Namespace of the element or attribute node, or +nil+ if there is no namespace. * - * Get the attribute node with +name+ and +namespace+ - */ -static VALUE -attribute_with_ns(VALUE self, VALUE name, VALUE namespace) -{ - xmlNodePtr node; - xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); - prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name), - NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace)); - - if (! prop) { return Qnil; } - return noko_xml_node_wrap(Qnil, (xmlNodePtr)prop); -} - -/* - * @overload attribute_nodes() - * Get the attributes for a Node - * @return [Array] containing the Node's attributes. - */ -static VALUE -attribute_nodes(VALUE rb_node) -{ - xmlNodePtr c_node; - - Data_Get_Struct(rb_node, xmlNode, c_node); - - return noko_xml_node_attrs(c_node); -} - - -/* - * call-seq: - * namespace() + * *Example:* * - * returns the namespace of the element or attribute node as a Namespace - * object, or nil if there is no namespace for the element or attribute. + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * + * + * EOF + * doc.at_xpath("//first").namespace + * # => nil + * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace + * # => #(Namespace:0x3c { href = "http://example.com/child" }) + * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace + * # => #(Namespace:0x50 { prefix = "foo", href = "http://example.com/foo" }) */ static VALUE -noko_xml_node_namespace(VALUE rb_node) +rb_xml_node_namespace(VALUE rb_node) { xmlNodePtr c_node ; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); if (c_node->ns) { return noko_xml_namespace_wrap(c_node->ns, c_node->doc); @@ -1060,10 +1397,32 @@ noko_xml_node_namespace(VALUE rb_node) } /* - * call-seq: - * namespace_definitions() + * :call-seq: + * namespace_definitions() → Array + * + * [Returns] + * Namespaces that are defined directly on this node, as an Array of Namespace objects. The array + * will be empty if no namespaces are defined on this node. * - * returns namespaces defined on self element directly, as an array of Namespace objects. Includes both a default namespace (as in"xmlns="), and prefixed namespaces (as in "xmlns:prefix="). + * *Example:* + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * + * + * EOF + * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_definitions + * # => [] + * doc.at_xpath("//xmlns:second", "xmlns" => "http://example.com/child").namespace_definitions + * # => [#(Namespace:0x3c { href = "http://example.com/child" }), + * # #(Namespace:0x50 { + * # prefix = "unused", + * # href = "http://example.com/unused" + * # })] + * doc.at_xpath("//foo:third", "foo" => "http://example.com/foo").namespace_definitions + * # => [#(Namespace:0x64 { prefix = "foo", href = "http://example.com/foo" })] */ static VALUE namespace_definitions(VALUE rb_node) @@ -1073,7 +1432,7 @@ namespace_definitions(VALUE rb_node) xmlNsPtr c_namespace; VALUE definitions = rb_ary_new(); - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); c_namespace = c_node->nsDef; if (!c_namespace) { @@ -1089,23 +1448,42 @@ namespace_definitions(VALUE rb_node) } /* - * call-seq: - * namespace_scopes() + * :call-seq: + * namespace_scopes() → Array + * + * [Returns] Array of all the Namespaces on this node and its ancestors. * - * returns namespaces in scope for self -- those defined on self element - * directly or any ancestor node -- as an array of Namespace objects. Default - * namespaces ("xmlns=" style) for self are included in this array; Default - * namespaces for ancestors, however, are not. See also #namespaces + * See also #namespaces + * + * *Example:* + * + * doc = Nokogiri::XML(<<~EOF) + * + * + * + * + * + * EOF + * doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes + * # => [#(Namespace:0x3c { href = "http://example.com/root" }), + * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })] + * doc.at_xpath("//child:second", "child" => "http://example.com/child").namespace_scopes + * # => [#(Namespace:0x64 { href = "http://example.com/child" }), + * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })] + * doc.at_xpath("//root:third", "root" => "http://example.com/root").namespace_scopes + * # => [#(Namespace:0x78 { prefix = "foo", href = "http://example.com/foo" }), + * # #(Namespace:0x3c { href = "http://example.com/root" }), + * # #(Namespace:0x50 { prefix = "bar", href = "http://example.com/bar" })] */ static VALUE -namespace_scopes(VALUE rb_node) +rb_xml_node_namespace_scopes(VALUE rb_node) { xmlNodePtr c_node ; xmlNsPtr *namespaces; VALUE scopes = rb_ary_new(); int j; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); namespaces = xmlGetNsList(c_node->doc, c_node); if (!namespaces) { @@ -1130,7 +1508,7 @@ static VALUE node_type(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return INT2NUM((long)node->type); } @@ -1144,7 +1522,7 @@ static VALUE set_native_content(VALUE self, VALUE content) { xmlNodePtr node, child, next ; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = node->children; while (NULL != child) { @@ -1158,30 +1536,6 @@ set_native_content(VALUE self, VALUE content) return content; } -/* - * call-seq: - * content - * - * Returns the plaintext content for this Node. Note that entities will always - * be expanded in the returned string. - */ -static VALUE -get_native_content(VALUE self) -{ - xmlNodePtr node; - xmlChar *content; - - Data_Get_Struct(self, xmlNode, node); - - content = xmlNodeGetContent(node); - if (content) { - VALUE rval = NOKOGIRI_STR_NEW2(content); - xmlFree(content); - return rval; - } - return Qnil; -} - /* * call-seq: * lang= @@ -1194,7 +1548,7 @@ set_lang(VALUE self_rb, VALUE lang_rb) xmlNodePtr self ; xmlChar *lang ; - Data_Get_Struct(self_rb, xmlNode, self); + Noko_Node_Get_Struct(self_rb, xmlNode, self); lang = (xmlChar *)StringValueCStr(lang_rb); xmlNodeSetLang(self, lang); @@ -1216,7 +1570,7 @@ get_lang(VALUE self_rb) xmlChar *lang ; VALUE lang_rb ; - Data_Get_Struct(self_rb, xmlNode, self); + Noko_Node_Get_Struct(self_rb, xmlNode, self); lang = xmlNodeGetLang(self); if (lang) { @@ -1245,7 +1599,7 @@ static VALUE get_parent(VALUE self) { xmlNodePtr node, parent; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); parent = node->parent; if (!parent) { return Qnil; } @@ -1263,7 +1617,7 @@ static VALUE set_name(VALUE self, VALUE new_name) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name)); return new_name; } @@ -1278,7 +1632,7 @@ static VALUE get_name(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (node->name) { return NOKOGIRI_STR_NEW2(node->name); } @@ -1292,17 +1646,25 @@ get_name(VALUE self) * Returns the path associated with this Node */ static VALUE -path(VALUE self) +rb_xml_node_path(VALUE rb_node) { - xmlNodePtr node; - xmlChar *path ; + xmlNodePtr c_node; + xmlChar *c_path ; VALUE rval; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); + + c_path = xmlGetNodePath(c_node); + if (c_path == NULL) { + // see https://github.com/sparklemotion/nokogiri/issues/2250 + // this behavior is clearly undesirable, but is what libxml <= 2.9.10 returned, and so we + // do this for now to preserve the behavior across libxml2 versions. + rval = NOKOGIRI_STR_NEW2("?"); + } else { + rval = NOKOGIRI_STR_NEW2(c_path); + xmlFree(c_path); + } - path = xmlGetNodePath(node); - rval = NOKOGIRI_STR_NEW2(path); - xmlFree(path); return rval ; } @@ -1339,7 +1701,7 @@ native_write_to( const char *before_indent; xmlSaveCtxtPtr savectx; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlIndentTreeOutput = 1; @@ -1363,18 +1725,39 @@ native_write_to( } /* - * call-seq: - * line + * :call-seq: + * line() → Integer + * + * [Returns] The line number of this Node. + * + * --- + * + * ⚠ The CRuby and JRuby implementations differ in important ways! + * + * Semantic differences: + * - The CRuby method reflects the node's line number in the parsed string + * - The JRuby method reflects the node's line number in the final DOM structure after + * corrections have been applied + * + * Performance differences: + * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time] + * (constant time) + * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear + * time, where n is the number of nodes before/above the element in the DOM) * - * Returns the line for this Node + * If you'd like to help improve the JRuby implementation, please review these issues and reach out + * to the maintainers: + * - https://github.com/sparklemotion/nokogiri/issues/1223 + * - https://github.com/sparklemotion/nokogiri/pull/2177 + * - https://github.com/sparklemotion/nokogiri/issues/2380 */ static VALUE -line(VALUE self) +rb_xml_node_line(VALUE rb_node) { - xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + xmlNodePtr c_node; + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); - return INT2NUM(xmlGetLineNo(node)); + return INT2NUM(xmlGetLineNo(c_node)); } /* @@ -1384,90 +1767,56 @@ line(VALUE self) * Sets the line for this Node. num must be less than 65535. */ static VALUE -set_line(VALUE self, VALUE num) -{ - xmlNodePtr node; - int value = NUM2INT(num); - - Data_Get_Struct(self, xmlNode, node); - if (value < 65535) { - node->line = value; - } - - return num; -} - -/* - * call-seq: - * add_namespace_definition(prefix, href) - * - * Adds a namespace definition with +prefix+ using +href+ value. The result is - * as if parsed XML for this node had included an attribute - * 'xmlns:prefix=value'. A default namespace for this node ("xmlns=") can be - * added by passing 'nil' for prefix. Namespaces added this way will not - * show up in #attributes, but they will be included as an xmlns attribute - * when the node is serialized to XML. - */ -static VALUE -add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_href) +rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number) { - xmlNodePtr c_node, element; - xmlNsPtr c_namespace; - const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix)); - - Data_Get_Struct(rb_node, xmlNode, c_node); - element = c_node ; + xmlNodePtr c_node; + int line_number = NUM2INT(rb_line_number); - c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); - if (!c_namespace) { - if (c_node->type != XML_ELEMENT_NODE) { - element = c_node->parent; + // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes. + // search for "psvi" in SAX2.c and tree.c to learn more. + if (line_number < 65535) { + c_node->line = (short) line_number; + } else { + c_node->line = 65535; + if (c_node->type == XML_TEXT_NODE) { + c_node->psvi = (void *)(ptrdiff_t) line_number; } - c_namespace = xmlNewNs(element, (const xmlChar *)StringValueCStr(rb_href), c_prefix); - } - - if (!c_namespace) { - return Qnil ; - } - - if (NIL_P(rb_prefix) || c_node != element) { - xmlSetNs(c_node, c_namespace); } - return noko_xml_namespace_wrap(c_namespace, c_node->doc); + return rb_line_number; } -/* - * @overload new(name, document) - * Create a new node with +name+ sharing GC lifecycle with +document+. - * @param name [String] - * @param document [Nokogiri::XML::Document] - * @yieldparam node [Nokogiri::XML::Node] - * @return [Nokogiri::XML::Node] - * @see Nokogiri::XML::Node#initialize - */ +/* :nodoc: documented in lib/nokogiri/xml/node.rb */ static VALUE rb_xml_node_new(int argc, VALUE *argv, VALUE klass) { - xmlDocPtr doc; - xmlNodePtr node; - VALUE name; - VALUE document; + xmlNodePtr c_document_node; + xmlNodePtr c_node; + VALUE rb_name; + VALUE rb_document_node; VALUE rest; VALUE rb_node; - rb_scan_args(argc, argv, "2*", &name, &document, &rest); + rb_scan_args(argc, argv, "2*", &rb_name, &rb_document_node, &rest); - Data_Get_Struct(document, xmlDoc, doc); + if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlNode)) { + rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node"); + } + if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) { + // TODO: deprecate allowing Node + NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + } + Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node); - node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(name)); - node->doc = doc->doc; - noko_xml_document_pin_node(node); + c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name)); + c_node->doc = c_document_node->doc; + noko_xml_document_pin_node(c_node); rb_node = noko_xml_node_wrap( klass == cNokogiriXmlNode ? (VALUE)NULL : klass, - node + c_node ); rb_obj_call_init(rb_node, argc, argv); @@ -1489,7 +1838,7 @@ dump_html(VALUE self) xmlNodePtr node ; VALUE html; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); buf = xmlBufferCreate() ; htmlNodeDump(buf, node->doc, node); @@ -1508,8 +1857,8 @@ static VALUE compare(VALUE self, VALUE _other) { xmlNodePtr node, other; - Data_Get_Struct(self, xmlNode, node); - Data_Get_Struct(_other, xmlNode, other); + Noko_Node_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(_other, xmlNode, other); return INT2NUM((long)xmlXPathCmpNodes(other, node)); } @@ -1529,7 +1878,7 @@ process_xincludes(VALUE self, VALUE options) xmlNodePtr node; VALUE error_list = rb_ary_new(); - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options)); @@ -1560,7 +1909,7 @@ in_context(VALUE self, VALUE _str, VALUE _options) VALUE doc, err; int doc_is_empty; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = DOC_RUBY_OBJECT(node->doc); err = rb_iv_get(doc, "@errors"); @@ -1601,9 +1950,7 @@ in_context(VALUE self, VALUE _str, VALUE _options) */ child_iter = node->doc->children ; while (child_iter) { - if (child_iter->parent != (xmlNodePtr)node->doc) { - child_iter->parent = (xmlNodePtr)node->doc; - } + child_iter->parent = (xmlNodePtr)node->doc; child_iter = child_iter->next; } @@ -1633,12 +1980,12 @@ in_context(VALUE self, VALUE _str, VALUE _options) /* FIXME: This probably needs to handle more constants... */ switch (error) { - case XML_ERR_INTERNAL_ERROR: - case XML_ERR_NO_MEMORY: - rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error); - break; - default: - break; + case XML_ERR_INTERNAL_ERROR: + case XML_ERR_NO_MEMORY: + rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error); + break; + default: + break; } set = xmlXPathNodeSetCreate(NULL); @@ -1654,14 +2001,12 @@ in_context(VALUE self, VALUE _str, VALUE _options) return noko_xml_node_set_wrap(set, doc); } - VALUE noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) { VALUE rb_document, rb_node_cache, rb_node; nokogiriTuplePtr node_has_a_document; xmlDocPtr c_doc; - void (*mark_method)(xmlNodePtr) = NULL ; assert(c_node); @@ -1669,11 +2014,9 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) return DOC_RUBY_OBJECT(c_node->doc); } - /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */ - /* see https://github.com/sparklemotion/nokogiri/issues/95 */ - /* and https://github.com/sparklemotion/nokogiri/issues/439 */ c_doc = c_node->doc; - if (c_doc->type == XML_DOCUMENT_FRAG_NODE) { c_doc = c_doc->doc; } + + // Nodes yielded from XML::Reader don't have a fully-realized Document node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc); if (c_node->_private && node_has_a_document) { @@ -1682,50 +2025,48 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) if (!RTEST(rb_class)) { switch (c_node->type) { - case XML_ELEMENT_NODE: - rb_class = cNokogiriXmlElement; - break; - case XML_TEXT_NODE: - rb_class = cNokogiriXmlText; - break; - case XML_ATTRIBUTE_NODE: - rb_class = cNokogiriXmlAttr; - break; - case XML_ENTITY_REF_NODE: - rb_class = cNokogiriXmlEntityReference; - break; - case XML_COMMENT_NODE: - rb_class = cNokogiriXmlComment; - break; - case XML_DOCUMENT_FRAG_NODE: - rb_class = cNokogiriXmlDocumentFragment; - break; - case XML_PI_NODE: - rb_class = cNokogiriXmlProcessingInstruction; - break; - case XML_ENTITY_DECL: - rb_class = cNokogiriXmlEntityDecl; - break; - case XML_CDATA_SECTION_NODE: - rb_class = cNokogiriXmlCData; - break; - case XML_DTD_NODE: - rb_class = cNokogiriXmlDtd; - break; - case XML_ATTRIBUTE_DECL: - rb_class = cNokogiriXmlAttributeDecl; - break; - case XML_ELEMENT_DECL: - rb_class = cNokogiriXmlElementDecl; - break; - default: - rb_class = cNokogiriXmlNode; + case XML_ELEMENT_NODE: + rb_class = cNokogiriXmlElement; + break; + case XML_TEXT_NODE: + rb_class = cNokogiriXmlText; + break; + case XML_ATTRIBUTE_NODE: + rb_class = cNokogiriXmlAttr; + break; + case XML_ENTITY_REF_NODE: + rb_class = cNokogiriXmlEntityReference; + break; + case XML_COMMENT_NODE: + rb_class = cNokogiriXmlComment; + break; + case XML_DOCUMENT_FRAG_NODE: + rb_class = cNokogiriXmlDocumentFragment; + break; + case XML_PI_NODE: + rb_class = cNokogiriXmlProcessingInstruction; + break; + case XML_ENTITY_DECL: + rb_class = cNokogiriXmlEntityDecl; + break; + case XML_CDATA_SECTION_NODE: + rb_class = cNokogiriXmlCData; + break; + case XML_DTD_NODE: + rb_class = cNokogiriXmlDtd; + break; + case XML_ATTRIBUTE_DECL: + rb_class = cNokogiriXmlAttributeDecl; + break; + case XML_ELEMENT_DECL: + rb_class = cNokogiriXmlElementDecl; + break; + default: + rb_class = cNokogiriXmlNode; } } - mark_method = node_has_a_document ? mark : NULL ; - - rb_node = Data_Wrap_Struct(rb_class, mark_method, debug_node_dealloc, c_node) ; + rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ; c_node->_private = (void *)rb_node; if (node_has_a_document) { @@ -1762,63 +2103,63 @@ noko_init_xml_node() { cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlNode); + rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1); - rb_define_method(cNokogiriXmlNode, "add_namespace_definition", add_namespace_definition, 2); + rb_define_method(cNokogiriXmlNode, "add_namespace_definition", rb_xml_node_add_namespace_definition, 2); + rb_define_method(cNokogiriXmlNode, "attribute", rb_xml_node_attribute, 1); + rb_define_method(cNokogiriXmlNode, "attribute_nodes", rb_xml_node_attribute_nodes, 0); + rb_define_method(cNokogiriXmlNode, "attribute_with_ns", rb_xml_node_attribute_with_ns, 2); + rb_define_method(cNokogiriXmlNode, "blank?", rb_xml_node_blank_eh, 0); + rb_define_method(cNokogiriXmlNode, "child", rb_xml_node_child, 0); + rb_define_method(cNokogiriXmlNode, "children", rb_xml_node_children, 0); + rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0); + rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3); + rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3); + rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0); + rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1); + rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0); + rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1); + rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0); + rb_define_method(cNokogiriXmlNode, "first_element_child", rb_xml_node_first_element_child, 0); + rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0); + rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1); + rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0); + rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1); + rb_define_method(cNokogiriXmlNode, "last_element_child", rb_xml_node_last_element_child, 0); + rb_define_method(cNokogiriXmlNode, "line", rb_xml_node_line, 0); + rb_define_method(cNokogiriXmlNode, "line=", rb_xml_node_line_set, 1); + rb_define_method(cNokogiriXmlNode, "namespace", rb_xml_node_namespace, 0); + rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0); + rb_define_method(cNokogiriXmlNode, "namespace_scopes", rb_xml_node_namespace_scopes, 0); + rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2); + rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1); + rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0); + rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0); rb_define_method(cNokogiriXmlNode, "node_name", get_name, 0); - rb_define_method(cNokogiriXmlNode, "document", document, 0); rb_define_method(cNokogiriXmlNode, "node_name=", set_name, 1); + rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0); rb_define_method(cNokogiriXmlNode, "parent", get_parent, 0); - rb_define_method(cNokogiriXmlNode, "child", child, 0); - rb_define_method(cNokogiriXmlNode, "first_element_child", first_element_child, 0); - rb_define_method(cNokogiriXmlNode, "last_element_child", last_element_child, 0); - rb_define_method(cNokogiriXmlNode, "children", children, 0); - rb_define_method(cNokogiriXmlNode, "element_children", element_children, 0); - rb_define_method(cNokogiriXmlNode, "next_sibling", next_sibling, 0); - rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0); - rb_define_method(cNokogiriXmlNode, "next_element", next_element, 0); + rb_define_method(cNokogiriXmlNode, "path", rb_xml_node_path, 0); + rb_define_method(cNokogiriXmlNode, "pointer_id", rb_xml_node_pointer_id, 0); rb_define_method(cNokogiriXmlNode, "previous_element", previous_element, 0); - rb_define_method(cNokogiriXmlNode, "node_type", node_type, 0); - rb_define_method(cNokogiriXmlNode, "path", path, 0); - rb_define_method(cNokogiriXmlNode, "key?", key_eh, 1); - rb_define_method(cNokogiriXmlNode, "namespaced_key?", namespaced_key_eh, 2); - rb_define_method(cNokogiriXmlNode, "blank?", blank_eh, 0); - rb_define_method(cNokogiriXmlNode, "attribute_nodes", attribute_nodes, 0); - rb_define_method(cNokogiriXmlNode, "attribute", attr, 1); - rb_define_method(cNokogiriXmlNode, "attribute_with_ns", attribute_with_ns, 2); - rb_define_method(cNokogiriXmlNode, "namespace", noko_xml_node_namespace, 0); - rb_define_method(cNokogiriXmlNode, "namespace_definitions", namespace_definitions, 0); - rb_define_method(cNokogiriXmlNode, "namespace_scopes", namespace_scopes, 0); - rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1); - rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1); + rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0); rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0); - rb_define_method(cNokogiriXmlNode, "internal_subset", internal_subset, 0); - rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0); - rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3); - rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3); - rb_define_method(cNokogiriXmlNode, "pointer_id", pointer_id, 0); - rb_define_method(cNokogiriXmlNode, "line", line, 0); - rb_define_method(cNokogiriXmlNode, "line=", set_line, 1); - rb_define_method(cNokogiriXmlNode, "content", get_native_content, 0); - rb_define_method(cNokogiriXmlNode, "native_content=", set_native_content, 1); - rb_define_method(cNokogiriXmlNode, "lang", get_lang, 0); - rb_define_method(cNokogiriXmlNode, "lang=", set_lang, 1); - rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1); - rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2); rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1); - rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1); rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1); - rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1); + rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1); + rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1); rb_define_private_method(cNokogiriXmlNode, "dump_html", dump_html, 0); - rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4); rb_define_private_method(cNokogiriXmlNode, "get", get, 1); + rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2); + rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4); + rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1); + rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1); rb_define_private_method(cNokogiriXmlNode, "set", set, 2); rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1); - rb_define_private_method(cNokogiriXmlNode, "compare", compare, 1); id_decorate = rb_intern("decorate"); id_decorate_bang = rb_intern("decorate!"); } - -/* vim: set noet sw=4 sws=4 */ diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index 1c17c2affd..66f8780d5a 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -20,20 +20,20 @@ ruby_object_get(xmlNodePtr c_node) { /* see xmlElementType in libxml2 tree.h */ switch (c_node->type) { - case XML_NAMESPACE_DECL: - /* _private is later in the namespace struct */ - return (VALUE)(((xmlNsPtr)c_node)->_private); - - case XML_DOCUMENT_NODE: - case XML_HTML_DOCUMENT_NODE: - /* in documents we use _private to store a tuple */ - if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) { - return DOC_RUBY_OBJECT((xmlDocPtr)c_node); - } - return (VALUE)NULL; - - default: - return (VALUE)(c_node->_private); + case XML_NAMESPACE_DECL: + /* _private is later in the namespace struct */ + return (VALUE)(((xmlNsPtr)c_node)->_private); + + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + /* in documents we use _private to store a tuple */ + if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) { + return DOC_RUBY_OBJECT((xmlDocPtr)c_node); + } + return (VALUE)NULL; + + default: + return (VALUE)(c_node->_private); } } @@ -156,7 +156,7 @@ push(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); xmlXPathNodeSetAdd(node_set, node); @@ -179,7 +179,7 @@ delete (VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); if (xmlXPathNodeSetContains(node_set, node)) { xpath_node_set_del(node_set, node); @@ -228,7 +228,7 @@ include_eh(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse); } @@ -373,12 +373,12 @@ slice(int argc, VALUE *argv, VALUE self) /* if arg is Range */ switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) { - case Qfalse: - break; - case Qnil: - return Qnil; - default: - return subseq(self, beg, len); + case Qfalse: + break; + case Qnil: + return Qnil; + default: + return subseq(self, beg, len); } return index_at(self, NUM2LONG(arg)); @@ -430,7 +430,7 @@ unlink_nodeset(VALUE self) xmlNodePtr node_ptr; node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]); rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */ - Data_Get_Struct(node, xmlNode, node_ptr); + Noko_Node_Get_Struct(node, xmlNode, node_ptr); node_set->nodeTab[j] = node_ptr ; } } diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index d2c6a7f369..022f8ff531 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader) return (0); } +// TODO: merge this function into the `namespaces` method implementation static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { @@ -148,9 +149,13 @@ namespaces(VALUE self) } /* - * @overload attribute_nodes() - * Get the attributes of the current node as an Array of Attr - * @return [Array] + :call-seq: attribute_nodes() → Array + + Get the attributes of the current node as an Array of XML:Attr + + ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri. + + See related: #attribute_hash, #attributes */ static VALUE rb_xml_reader_attribute_nodes(VALUE rb_reader) @@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) VALUE attr_nodes; int j; + // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598 + // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c + NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); if (! has_attributes(c_reader)) { @@ -181,6 +190,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) return attr_nodes; } +/* + :call-seq: attribute_hash() → Hash + + Get the attributes of the current node as a Hash of names and values. + + See related: #attributes and #namespaces + */ +static VALUE +rb_xml_reader_attribute_hash(VALUE rb_reader) +{ + VALUE rb_attributes = rb_hash_new(); + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + xmlAttrPtr c_property; + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + if (!has_attributes(c_reader)) { + return rb_attributes; + } + + c_node = xmlTextReaderExpand(c_reader); + c_property = c_node->properties; + while (c_property != NULL) { + VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name); + VALUE rb_value = Qnil; + xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property); + + if (c_value) { + rb_value = NOKOGIRI_STR_NEW2(c_value); + xmlFree(c_value); + } + + rb_hash_aset(rb_attributes, rb_name, rb_value); + + c_property = c_property->next; + } + + return rb_attributes; +} + /* * call-seq: * attribute_at(index) @@ -414,16 +464,23 @@ name(VALUE self) * Get the xml:base of the node */ static VALUE -base_uri(VALUE self) +rb_xml_reader_base_uri(VALUE rb_reader) { - xmlTextReaderPtr reader; - const char *base_uri; + VALUE rb_base_uri; + xmlTextReaderPtr c_reader; + xmlChar *c_base_uri; - Data_Get_Struct(self, xmlTextReader, reader); - base_uri = (const char *)xmlTextReaderBaseUri(reader); - if (base_uri == NULL) { return Qnil; } + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + c_base_uri = xmlTextReaderBaseUri(c_reader); + if (c_base_uri == NULL) { + return Qnil; + } + + rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri); + xmlFree(c_base_uri); - return NOKOGIRI_STR_NEW2(base_uri); + return rb_base_uri; } /* @@ -652,6 +709,24 @@ empty_element_p(VALUE self) return Qfalse; } +static VALUE +rb_xml_reader_encoding(VALUE rb_reader) +{ + xmlTextReaderPtr c_reader; + const char *parser_encoding; + VALUE constructor_encoding; + + constructor_encoding = rb_iv_get(rb_reader, "@encoding"); + if (RTEST(constructor_encoding)) { + return constructor_encoding; + } + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader); + if (parser_encoding == NULL) { return Qnil; } + return NOKOGIRI_STR_NEW2(parser_encoding); +} + void noko_init_xml_reader() { @@ -662,6 +737,8 @@ noko_init_xml_reader() */ cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlReader); + rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1); rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1); @@ -669,11 +746,13 @@ noko_init_xml_reader() rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1); rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0); rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0); + rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0); rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0); - rb_define_method(cNokogiriXmlReader, "base_uri", base_uri, 0); + rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0); rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0); rb_define_method(cNokogiriXmlReader, "depth", depth, 0); rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0); + rb_define_method(cNokogiriXmlReader, "encoding", rb_xml_reader_encoding, 0); rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0); rb_define_method(cNokogiriXmlReader, "lang", lang, 0); rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0); diff --git a/ext/nokogiri/xml_sax_parser.c b/ext/nokogiri/xml_sax_parser.c index 1e4a293ff0..5d953be6c2 100644 --- a/ext/nokogiri/xml_sax_parser.c +++ b/ext/nokogiri/xml_sax_parser.c @@ -30,12 +30,12 @@ start_document(void *ctx) version = ctxt->version ? NOKOGIRI_STR_NEW2(ctxt->version) : Qnil; switch (ctxt->standalone) { - case 0: - standalone = NOKOGIRI_STR_NEW2("no"); - break; - case 1: - standalone = NOKOGIRI_STR_NEW2("yes"); - break; + case 0: + standalone = NOKOGIRI_STR_NEW2("no"); + break; + case 1: + standalone = NOKOGIRI_STR_NEW2("yes"); + break; } rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone); diff --git a/ext/nokogiri/xml_sax_parser_context.c b/ext/nokogiri/xml_sax_parser_context.c index 35b4f320e9..60c491984b 100644 --- a/ext/nokogiri/xml_sax_parser_context.c +++ b/ext/nokogiri/xml_sax_parser_context.c @@ -2,6 +2,8 @@ VALUE cNokogiriXmlSaxParserContext ; +static ID id_read; + static void deallocate(xmlParserCtxtPtr ctxt) { @@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding) xmlParserCtxtPtr ctxt; xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding); + if (!rb_respond_to(io, id_read)) { + rb_raise(rb_eTypeError, "argument expected to respond to :read"); + } + ctxt = xmlCreateIOParserCtxt(NULL, NULL, (xmlInputReadCallback)noko_io_read, (xmlInputCloseCallback)noko_io_close, @@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data) { xmlParserCtxtPtr ctxt; - if (NIL_P(data)) { - rb_raise(rb_eArgError, "data cannot be nil"); - } + Check_Type(data, T_STRING); + if (!(int)RSTRING_LEN(data)) { rb_raise(rb_eRuntimeError, "data cannot be empty"); } @@ -265,6 +270,8 @@ noko_init_xml_sax_parser_context() { cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlSaxParserContext); + rb_define_singleton_method(cNokogiriXmlSaxParserContext, "io", parse_io, 2); rb_define_singleton_method(cNokogiriXmlSaxParserContext, "memory", parse_memory, 1); rb_define_singleton_method(cNokogiriXmlSaxParserContext, "file", parse_file, 1); @@ -276,4 +283,6 @@ noko_init_xml_sax_parser_context() rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0); rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0); rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0); + + id_read = rb_intern("read"); } diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index f0e980c878..2707cae235 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -25,7 +25,7 @@ validate_document(VALUE self, VALUE document) VALUE errors; Data_Get_Struct(self, xmlSchema, schema); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); errors = rb_ary_new(); @@ -179,7 +179,7 @@ has_blank_nodes_p(VALUE cache) for (i = 0; i < RARRAY_LEN(cache); i++) { xmlNodePtr node; VALUE element = rb_ary_entry(cache, i); - Data_Get_Struct(element, xmlNode, node); + Noko_Node_Get_Struct(element, xmlNode, node); if (xmlIsBlankNode(node)) { return 1; } @@ -210,7 +210,7 @@ from_document(int argc, VALUE *argv, VALUE klass) scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); doc = doc->doc; /* In case someone passes us a node. ugh. */ if (scanned_args == 1) { @@ -274,6 +274,8 @@ noko_init_xml_schema() { cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlSchema); + rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1); rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1); diff --git a/ext/nokogiri/xml_text.c b/ext/nokogiri/xml_text.c index eb78e2373c..6ba75e450b 100644 --- a/ext/nokogiri/xml_text.c +++ b/ext/nokogiri/xml_text.c @@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &string, &document, &rest); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); node = xmlNewText((xmlChar *)StringValueCStr(string)); node->doc = doc->doc; diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index 805b1dd507..5483bd6f9b 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -86,6 +86,26 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs) xmlXPathFreeObject(needle); } + +/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should ignore namespaces */ +static void +xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs) +{ + xmlXPathObjectPtr element_name; + + assert(ctxt->context->node); + + CHECK_ARITY(1); + CAST_TO_STRING; + CHECK_TYPE(XPATH_STRING); + element_name = valuePop(ctxt); + + valuePush(ctxt, xmlXPathNewBoolean(xmlStrEqual(ctxt->context->node->name, element_name->stringval))); + + xmlXPathFreeObject(element_name); +} + + /* * call-seq: * register_ns(prefix, uri) @@ -128,11 +148,44 @@ register_variable(VALUE self, VALUE name, VALUE value) return self; } + +/* + * convert an XPath object into a Ruby object of the appropriate type. + * returns Qundef if no conversion was possible. + */ +static VALUE +xpath2ruby(xmlXPathObjectPtr xobj, xmlXPathContextPtr xctx) +{ + VALUE retval; + + assert(xctx->doc); + assert(DOC_RUBY_OBJECT_TEST(xctx->doc)); + + switch (xobj->type) { + case XPATH_STRING: + retval = NOKOGIRI_STR_NEW2(xobj->stringval); + xmlFree(xobj->stringval); + return retval; + + case XPATH_NODESET: + return noko_xml_node_set_wrap(xobj->nodesetval, + DOC_RUBY_OBJECT(xctx->doc)); + + case XPATH_NUMBER: + return rb_float_new(xobj->floatval); + + case XPATH_BOOLEAN: + return (xobj->boolval == 1) ? Qtrue : Qfalse; + + default: + return Qundef; + } +} + void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char *function_name) { - int i; VALUE result, doc; VALUE *argv; VALUE node_set = Qnil; @@ -143,81 +196,66 @@ Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, i assert(DOC_RUBY_OBJECT_TEST(ctx->context->doc)); argv = (VALUE *)calloc((size_t)nargs, sizeof(VALUE)); - for (i = 0 ; i < nargs ; ++i) { - rb_gc_register_address(&argv[i]); + for (int j = 0 ; j < nargs ; ++j) { + rb_gc_register_address(&argv[j]); } doc = DOC_RUBY_OBJECT(ctx->context->doc); - if (nargs > 0) { - i = nargs - 1; - do { - obj = valuePop(ctx); - switch (obj->type) { - case XPATH_STRING: - argv[i] = NOKOGIRI_STR_NEW2(obj->stringval); - break; - case XPATH_BOOLEAN: - argv[i] = obj->boolval == 1 ? Qtrue : Qfalse; - break; - case XPATH_NUMBER: - argv[i] = rb_float_new(obj->floatval); - break; - case XPATH_NODESET: - argv[i] = noko_xml_node_set_wrap(obj->nodesetval, doc); - break; - default: - argv[i] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj)); - } - xmlXPathFreeNodeSetList(obj); - } while (i-- > 0); + for (int j = nargs - 1 ; j >= 0 ; --j) { + obj = valuePop(ctx); + argv[j] = xpath2ruby(obj, ctx->context); + if (argv[j] == Qundef) { + argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(obj)); + } + xmlXPathFreeNodeSetList(obj); } result = rb_funcall2(handler, rb_intern((const char *)function_name), nargs, argv); - for (i = 0 ; i < nargs ; ++i) { - rb_gc_unregister_address(&argv[i]); + for (int j = 0 ; j < nargs ; ++j) { + rb_gc_unregister_address(&argv[j]); } free(argv); switch (TYPE(result)) { - case T_FLOAT: - case T_BIGNUM: - case T_FIXNUM: - xmlXPathReturnNumber(ctx, NUM2DBL(result)); - break; - case T_STRING: - xmlXPathReturnString( - ctx, - xmlCharStrdup(StringValueCStr(result)) - ); - break; - case T_TRUE: - xmlXPathReturnTrue(ctx); - break; - case T_FALSE: - xmlXPathReturnFalse(ctx); - break; - case T_NIL: - break; - case T_ARRAY: { - VALUE args[2]; - args[0] = doc; - args[1] = result; - node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet); - Data_Get_Struct(node_set, xmlNodeSet, xml_node_set); - xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set)); - } - break; - case T_DATA: - if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) { - Data_Get_Struct(result, xmlNodeSet, xml_node_set); - /* Copy the node set, otherwise it will get GC'd. */ - xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set)); + case T_FLOAT: + case T_BIGNUM: + case T_FIXNUM: + xmlXPathReturnNumber(ctx, NUM2DBL(result)); + break; + case T_STRING: + xmlXPathReturnString( + ctx, + xmlCharStrdup(StringValueCStr(result)) + ); break; + case T_TRUE: + xmlXPathReturnTrue(ctx); + break; + case T_FALSE: + xmlXPathReturnFalse(ctx); + break; + case T_NIL: + break; + case T_ARRAY: { + VALUE args[2]; + args[0] = doc; + args[1] = result; + node_set = rb_class_new_instance(2, args, cNokogiriXmlNodeSet); + Data_Get_Struct(node_set, xmlNodeSet, xml_node_set); + xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set)); } - default: - rb_raise(rb_eRuntimeError, "Invalid return type"); + break; + case T_DATA: + if (rb_obj_is_kind_of(result, cNokogiriXmlNodeSet)) { + Data_Get_Struct(result, xmlNodeSet, xml_node_set); + /* Copy the node set, otherwise it will get GC'd. */ + xmlXPathReturnNodeSet(ctx, xmlXPathNodeSetMerge(NULL, xml_node_set)); + break; + } + default: + rb_raise(rb_eRuntimeError, "Invalid return type"); } } @@ -275,7 +313,7 @@ static VALUE evaluate(int argc, VALUE *argv, VALUE self) { VALUE search_path, xpath_handler; - VALUE thing = Qnil; + VALUE retval = Qnil; xmlXPathContextPtr ctx; xmlXPathObjectPtr xpath; xmlChar *query; @@ -310,31 +348,14 @@ evaluate(int argc, VALUE *argv, VALUE self) rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } - assert(ctx->doc); - assert(DOC_RUBY_OBJECT_TEST(ctx->doc)); - - switch (xpath->type) { - case XPATH_STRING: - thing = NOKOGIRI_STR_NEW2(xpath->stringval); - xmlFree(xpath->stringval); - break; - case XPATH_NODESET: - thing = noko_xml_node_set_wrap(xpath->nodesetval, - DOC_RUBY_OBJECT(ctx->doc)); - break; - case XPATH_NUMBER: - thing = rb_float_new(xpath->floatval); - break; - case XPATH_BOOLEAN: - thing = xpath->boolval == 1 ? Qtrue : Qfalse; - break; - default: - thing = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc)); + retval = xpath2ruby(xpath, ctx); + if (retval == Qundef) { + retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(ctx->doc)); } xmlXPathFreeNodeSetList(xpath); - return thing; + return retval; } /* @@ -350,9 +371,12 @@ new (VALUE klass, VALUE nodeobj) xmlXPathContextPtr ctx; VALUE self; - Data_Get_Struct(nodeobj, xmlNode, node); + Noko_Node_Get_Struct(nodeobj, xmlNode, node); +#if LIBXML_VERSION < 21000 + /* deprecated in 40483d0 */ xmlXPathInit(); +#endif ctx = xmlXPathNewContext(node->doc); ctx->node = node; @@ -360,6 +384,8 @@ new (VALUE klass, VALUE nodeobj) xmlXPathRegisterNs(ctx, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI); xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI, xpath_builtin_css_class); + xmlXPathRegisterFuncNS(ctx, (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI, + xpath_builtin_local_name_is); self = Data_Wrap_Struct(klass, 0, deallocate, ctx); return self; @@ -373,6 +399,8 @@ noko_init_xml_xpath_context(void) */ cNokogiriXmlXpathContext = rb_define_class_under(mNokogiriXml, "XPathContext", rb_cObject); + rb_undef_alloc_func(cNokogiriXmlXpathContext); + rb_define_singleton_method(cNokogiriXmlXpathContext, "new", new, 1); rb_define_method(cNokogiriXmlXpathContext, "evaluate", evaluate, -1); diff --git a/ext/nokogiri/xslt_stylesheet.c b/ext/nokogiri/xslt_stylesheet.c index 146550f97b..0b75886d38 100644 --- a/ext/nokogiri/xslt_stylesheet.c +++ b/ext/nokogiri/xslt_stylesheet.c @@ -64,7 +64,6 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj) VALUE errstr, exception; xsltStylesheetPtr ss ; Data_Get_Struct(xmldocobj, xmlDoc, xml); - exsltRegisterAll(); errstr = rb_str_new(0, 0); xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler); @@ -108,19 +107,117 @@ serialize(VALUE self, VALUE xmlobj) } /* - * call-seq: - * transform(document, params = []) + * call-seq: + * transform(document) + * transform(document, params = {}) + * + * Apply an XSLT stylesheet to an XML::Document. + * + * [Parameters] + * - +document+ (Nokogiri::XML::Document) the document to be transformed. + * - +params+ (Hash, Array) strings used as XSLT parameters. + * + * [Returns] Nokogiri::XML::Document + * + * *Example* of basic transformation: + * + * xslt = <<~XSLT + * + * + * + * + * + * + * + *

+ *
    + * + *
  1. + *
    + *
+ * + * + *
+ * XSLT + * + * xml = <<~XML + * + * + * + * EMP0001 + * Accountant + * + * + * EMP0002 + * Developer + * + * + * XML + * + * doc = Nokogiri::XML::Document.parse(xml) + * stylesheet = Nokogiri::XSLT.parse(xslt) * - * Apply an XSLT stylesheet to an XML::Document. - * +params+ is an array of strings used as XSLT parameters. - * returns Nokogiri::XML::Document + * ⚠ Note that the +h1+ element is empty because no param has been provided! * - * Example: + * stylesheet.transform(doc).to_xml + * # => "\n" + + * # "

\n" + + * # "
    \n" + + * # "
  1. EMP0001
  2. \n" + + * # "
  3. EMP0002
  4. \n" + + * # "
\n" + + * # "\n" * - * doc = Nokogiri::XML(File.read(ARGV[0])) - * xslt = Nokogiri::XSLT(File.read(ARGV[1])) - * puts xslt.transform(doc, ['key', 'value']) + * *Example* of using an input parameter hash: * + * ⚠ The title is populated, but note how we need to quote-escape the value. + * + * stylesheet.transform(doc, { "title" => "'Employee List'" }).to_xml + * # => "\n" + + * # "

Employee List

\n" + + * # "
    \n" + + * # "
  1. EMP0001
  2. \n" + + * # "
  3. EMP0002
  4. \n" + + * # "
\n" + + * # "\n" + * + * *Example* using the XSLT.quote_params helper method to safely quote-escape strings: + * + * stylesheet.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Aaron's List" })).to_xml + * # => "\n" + + * # "

Aaron's List

\n" + + * # "
    \n" + + * # "
  1. EMP0001
  2. \n" + + * # "
  3. EMP0002
  4. \n" + + * # "
\n" + + * # "\n" + * + * *Example* using an array of XSLT parameters + * + * You can also use an array if you want to. + * + * stylesheet.transform(doc, ["title", "'Employee List'"]).to_xml + * # => "\n" + + * # "

Employee List

\n" + + * # "
    \n" + + * # "
  1. EMP0001
  2. \n" + + * # "
  3. EMP0002
  4. \n" + + * # "
\n" + + * # "\n" + * + * Or pass an array to XSLT.quote_params: + * + * stylesheet.transform(doc, Nokogiri::XSLT.quote_params(["title", "Aaron's List"])).to_xml + * # => "\n" + + * # "

Aaron's List

\n" + + * # "
    \n" + + * # "
  1. EMP0001
  2. \n" + + * # "
  3. EMP0002
  4. \n" + + * # "
\n" + + * # "\n" + * + * See: Nokogiri::XSLT.quote_params */ static VALUE transform(int argc, VALUE *argv, VALUE self) @@ -257,6 +354,8 @@ noko_init_xslt_stylesheet() cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject); + rb_undef_alloc_func(cNokogiriXsltStylesheet); + rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1); rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1); rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1); diff --git a/gumbo-parser/.gitignore b/gumbo-parser/.gitignore new file mode 100644 index 0000000000..3d04bd296b --- /dev/null +++ b/gumbo-parser/.gitignore @@ -0,0 +1,3 @@ +build +googletest +src/*.o diff --git a/gumbo-parser/CHANGES.md b/gumbo-parser/CHANGES.md new file mode 100644 index 0000000000..277b3a2bbe --- /dev/null +++ b/gumbo-parser/CHANGES.md @@ -0,0 +1,63 @@ +## Gumbo 0.10.1 (2015-04-30) + +Same as 0.10.0, but with the version number bumped because the last version-number commit to v0.9.4 makes GitHub think that v0.9.4 is the latest version and so it's not highlighted on the webpage. + +## Gumbo 0.10.0 (2015-04-30) + +* Full support for `