Skip to content

Commit

Permalink
Fix up multiple sites via test run
Browse files Browse the repository at this point in the history
Closes #312
Closes #284
  • Loading branch information
stefanw committed Jan 27, 2024
1 parent a8cb534 commit 9a90bed
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 107 deletions.
8 changes: 0 additions & 8 deletions src/extractor.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { ArticleInfo, ExtractorInterface, FormattedDateRange, RawArticleInfo, Site } from './types.js'

const QUOTES = /["„].*["„]/

class Extractor implements ExtractorInterface {
site: Site
root: HTMLElement
Expand Down Expand Up @@ -155,12 +153,6 @@ class Extractor implements ExtractorInterface {
articleInfo[key] = result
}
}
let q = articleInfo.query
// remove some special chars
q = q.replace(/[!:?;'/()]/g, ' ').replace(/(((?<!\d)[,.])|([,.](?!\d)))/g, ' ').replace(/ {1,}/g, ' ')
// remove non-leading/trailing quotes
q = q.split(QUOTES).map(s => s.trim()).filter(s => s.split(' ').length > 1).map(s => `"${s}"`).join(' ')
articleInfo.query = q
return {
query: articleInfo.query,
edition: articleInfo.edition,
Expand Down
Loading

0 comments on commit 9a90bed

Please sign in to comment.