Skip to content

Commit

Permalink
action and postscript updated
Browse files Browse the repository at this point in the history
  • Loading branch information
akosbalasko committed Aug 30, 2021
1 parent d532f1f commit ab217e1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/scrape-scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
with:
http_url: https://forum.obsidian.md/t/dataview-plugin-snippet-showcase/13673
downloaded_filename: dataview-plugin-snippet-showcase.html
postprocess: ./scripts/scraper-post-process.ts


postprocess: ./scraper-post-process.ts
- name: Create Pull Request
uses: peter-evans/create-pull-request@v3

43 changes: 43 additions & 0 deletions scraper-post-process.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { readJSON, writeJSON } from 'https://deno.land/x/flat/mod.ts'
import { cheerio } from "https://deno.land/x/[email protected]/mod.ts";

interface Post {
creator?: {
name: string,
url: string,
}
datePublished?: Date,
content?: string,
}

// The filename is the first invocation argument
const filename = Deno.args[0] // Same name as downloaded_filename
//const data = await readJSON(filename)
const html = await Deno.readTextFile("./dataview-plugin-snippet-showcase.html");
const $ = cheerio.load(html);

const posts = $('div[class="topic-body crawler-post"]');
var rawData: Array<Post> = []
await posts.each(function(index, data) {
const post = cheerio.load(data);
const creatorName = post('a[itemprop=url] > span[itemprop=name]');
const creatorUrl = post('span[class=creator] > a[itemprop=url]');
const publishedDate = post("span[class=crawler-post-infos] > time[itemprop=datePublished]");
const publishDate = publishedDate.first().attr('datetime')?.toString();
const content = post('div[class=post]')?.html() || '';
rawData.push({
creator: {
url: creatorUrl.first().attr('href')?.toString() || '',
name: creatorName.text(),
},
datePublished: publishDate ? new Date(publishDate): new Date(),
content,
})
}
);
const dataviewCodes = rawData.filter(post => post.content?.includes('```dataview'));
for (const codePosts of dataviewCodes){
const fileName =`${codePosts.creator?.name || ''}_${codePosts.datePublished?.getTime()}.json`
await writeJSON(`./scripts/${fileName}`, codePosts);
}
console.log(JSON.stringify(dataviewCodes));// Pluck a specific key off
Empty file removed scripts/scraper-post-process.ts
Empty file.

0 comments on commit ab217e1

Please sign in to comment.