Skip to content

Commit

Permalink
Add Chapter 6 code.
Browse files Browse the repository at this point in the history
  • Loading branch information
dom96 committed Jun 19, 2016
1 parent 10d8a4a commit 7a57702
Show file tree
Hide file tree
Showing 9 changed files with 308 additions and 0 deletions.
79 changes: 79 additions & 0 deletions Chapter6/WikipediaStats/concurrency.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
import tables, parseutils, strutils, threadpool

const filename = "pagecounts-20160101-050000"

type
Stats = ref object
projectName, pageTitle: string
requests, contentSize: int

proc `$`(stats: Stats): string =
"(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [
stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize
]

proc parse(chunk: string): Stats =
# Each line looks like: en Main_Page 242332 4737756101
result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)

var projectName = ""
var pageTitle = ""
var requests = ""
var contentSize = ""
for line in chunk.splitLines:
var i = 0
projectName.setLen(0)
i.inc parseUntil(line, projectName, Whitespace, i)
i.inc skipWhitespace(line, i)
pageTitle.setLen(0)
i.inc parseUntil(line, pageTitle, Whitespace, i)
i.inc skipWhitespace(line, i)
requests.setLen(0)
i.inc parseUntil(line, requests, Whitespace, i)
i.inc skipWhitespace(line, i)
contentSize.setLen(0)
i.inc parseUntil(line, contentSize, Whitespace, i)
i.inc skipWhitespace(line, i)

if requests.len == 0 or contentSize.len == 0:
# Ignore lines with either of the params that are empty.
continue

let requestsInt = requests.parseInt
if requestsInt > result.requests and projectName == "en":
result = Stats(
projectName: projectName,
pageTitle: pageTitle,
requests: requestsInt,
contentSize: contentSize.parseInt
)

proc readChunks(filename: string, chunksize = 1000000): Stats =
result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
var file = open(filename)
var responses = newSeq[FlowVar[Stats]]()
var buffer = newString(chunksize)
var oldBufferLen = 0
while not endOfFile(file):
let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen
var chunkLen = readSize

while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
# Find where the last line ends
chunkLen.dec

responses.add(spawn parse(buffer[0 .. <chunkLen]))
oldBufferLen = readSize - chunkLen
buffer[0 .. <oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]

for resp in responses:
let statistic = ^resp
if statistic.requests > result.requests:
result = statistic

file.close()


when isMainModule:
echo readChunks(filename)
1 change: 1 addition & 0 deletions Chapter6/WikipediaStats/concurrency.nim.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--threads:on
64 changes: 64 additions & 0 deletions Chapter6/WikipediaStats/concurrency_regex.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
import tables, parseutils, strutils, threadpool, re

const filename = "pagecounts-20160101-050000"

type
Stats = ref object
projectName, pageTitle: string
requests, contentSize: int

proc `$`(stats: Stats): string =
"(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [
stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize
]

proc parse(chunk: string): Stats =
# Each line looks like: en Main_Page 242332 4737756101
result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)

var matches: array[4, string]
var reg = re"([^\s]+)\s([^\s]+)\s(\d+)\s(\d+)"
for line in chunk.splitLines:

let start = find(line, reg, matches)
if start == -1: continue

let requestsInt = matches[2].parseInt
if requestsInt > result.requests and matches[0] == "en":
result = Stats(
projectName: matches[0],
pageTitle: matches[1],
requests: requestsInt,
contentSize: matches[3].parseInt
)

proc readChunks(filename: string, chunksize = 1000000): Stats =
result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
var file = open(filename)
var responses = newSeq[FlowVar[Stats]]()
var buffer = newString(chunksize)
var oldBufferLen = 0
while not endOfFile(file):
let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen
var chunkLen = readSize

while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
# Find where the last line ends
chunkLen.dec

responses.add(spawn parse(buffer[0 .. <chunkLen]))
oldBufferLen = readSize - chunkLen
buffer[0 .. <oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]

echo("Spawns: ", responses.len)
for resp in responses:
let statistic = ^resp
if statistic.requests > result.requests:
result = statistic

file.close()


when isMainModule:
echo readChunks(filename)
1 change: 1 addition & 0 deletions Chapter6/WikipediaStats/concurrency_regex.nim.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--threads:on
29 changes: 29 additions & 0 deletions Chapter6/WikipediaStats/naive.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
import tables, parseutils, strutils

const filename = "pagecounts-20150101-050000"

proc parse(filename: string): tuple[projectName, pageTitle: string,
requests, contentSize: int] =
# Each line looks like: en Main_Page 242332 4737756101
var file = open(filename)
for line in file.lines:
var i = 0
var projectName = ""
i.inc parseUntil(line, projectName, Whitespace, i)
i.inc
var pageTitle = ""
i.inc parseUntil(line, pageTitle, Whitespace, i)
i.inc
var requests = 0
i.inc parseInt(line, requests, i)
i.inc
var contentSize = 0
i.inc parseInt(line, contentSize, i)
if requests > result[2] and projectName == "en":
result = (projectName, pageTitle, requests, contentSize)

file.close()

when isMainModule:
echo parse(filename)
72 changes: 72 additions & 0 deletions Chapter6/WikipediaStats/parallel_counts.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os, parseutils, threadpool, strutils

type
Stats = ref object
domainCode, pageTitle: string
countViews, totalSize: int

proc newStats(): Stats =
Stats(domainCode: "", pageTitle: "", countViews: 0, totalSize: 0)

proc `$`(stats: Stats): string =
"(domainCode: $#, pageTitle: $#, countViews: $#, totalSize: $#)" % [
stats.domainCode, stats.pageTitle, $stats.countViews, $stats.totalSize
]

proc parse(line: string, domainCode, pageTitle: var string,
countViews, totalSize: var int) =
if line.len == 0: return
var i = 0
domainCode.setLen(0)
i.inc parseUntil(line, domainCode, {' '}, i)
i.inc
pageTitle.setLen(0)
i.inc parseUntil(line, pageTitle, {' '}, i)
i.inc
countViews = 0
i.inc parseInt(line, countViews, i)
i.inc
totalSize = 0
i.inc parseInt(line, totalSize, i)

proc parseChunk(chunk: string): Stats =
result = newStats()
var domainCode = ""
var pageTitle = ""
var countViews = 0
var totalSize = 0
for line in splitLines(chunk):
parse(line, domainCode, pageTitle, countViews, totalSize)
if domainCode == "en" and countViews > result.countViews:
result = Stats(domainCode: domainCode, pageTitle: pageTitle,
countViews: countViews, totalSize: totalSize)

proc readPageCounts(filename: string, chunkSize = 1_000_000) =
var file = open(filename)
var responses = newSeq[FlowVar[Stats]]()
var buffer = newString(chunksize)
var oldBufferLen = 0
while not endOfFile(file):
let reqSize = chunksize - oldBufferLen
let readSize = file.readChars(buffer, oldBufferLen, reqSize) + oldBufferLen
var chunkLen = readSize

while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
chunkLen.dec

responses.add(spawn parseChunk(buffer[0 .. <chunkLen]))
oldBufferLen = readSize - chunkLen
buffer[0 .. <oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]

var mostPopular = newStats()
for resp in responses:
let statistic = ^resp
if statistic.countViews > mostPopular.countViews:
mostPopular = statistic

echo("Most popular is: ", mostPopular)

when isMainModule:
const file = "pagecounts-20160101-050000"
let filename = getCurrentDir() / file
readPageCounts(filename)
13 changes: 13 additions & 0 deletions Chapter6/WikipediaStats/race_condition.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import threadpool

var counter = 0

proc increment(x: int) =
for i in 0 .. <x:
let value = counter + 1
counter = value

spawn increment(10_000)
spawn increment(10_000)
sync()
echo(counter)
34 changes: 34 additions & 0 deletions Chapter6/WikipediaStats/sequential_counts.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os, parseutils

proc parse(line: string, domainCode, pageTitle: var string,
countViews, totalSize: var int) =
var i = 0
domainCode.setLen(0)
i.inc parseUntil(line, domainCode, {' '}, i)
i.inc
pageTitle.setLen(0)
i.inc parseUntil(line, pageTitle, {' '}, i)
i.inc
countViews = 0
i.inc parseInt(line, countViews, i)
i.inc
totalSize = 0
i.inc parseInt(line, totalSize, i)

proc readPageCounts(filename: string) =
var domainCode = ""
var pageTitle = ""
var countViews = 0
var totalSize = 0
var mostPopular = ("", "", 0, 0)
for line in filename.lines:
parse(line, domainCode, pageTitle, countViews, totalSize)
if domainCode == "en" and countViews > mostPopular[2]:
mostPopular = (domainCode, pageTitle, countViews, totalSize)

echo("Most popular is: ", mostPopular)

when isMainModule:
const file = "pagecounts-20160101-050000"
let filename = getCurrentDir() / file
readPageCounts(filename)
15 changes: 15 additions & 0 deletions Chapter6/WikipediaStats/unguarded_access.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import threadpool, locks

var counterLock: Lock
initLock(counterLock)
var counter {.guard: counterLock.} = 0

proc increment(x: int) =
for i in 0 .. <x:
let value = counter + 1
counter = value

spawn increment(10_000)
spawn increment(10_000)
sync()
echo(counter)

0 comments on commit 7a57702

Please sign in to comment.