Skip to content

Commit

Permalink
Refactored algorithms @todo docs
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBelicza committed Mar 8, 2018
1 parent ae9d20a commit f637912
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 132 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ func main() {
// Default Language for filtering stop words.
language := textrank.NewDefaultLanguage()
// Using a little bit more complex algorithm to ranking text.
algorithmMix := textrank.NewMixedAlgorithm()
algorithmMix := textrank.NewChainAlgorithm()

// Add text.
tr.Populate(rawText, language, rule)
Expand Down Expand Up @@ -304,7 +304,7 @@ func main() {
tr2 := textrank.NewTextRank()

// Using a little bit more complex algorithm to ranking text.
algorithmMix := textrank.NewMixedAlgorithm()
algorithmMix := textrank.NewChainAlgorithm()

// Add text to the second graph.
tr2.Populate(rawText, language, rule)
Expand Down
4 changes: 2 additions & 2 deletions doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ algorithm by Algorithm interface and use it instead of defaults.
// Default Language for filtering stop words.
language := textrank.NewDefaultLanguage()
// Using a little bit more complex algorithm to ranking text.
algorithmMix := textrank.NewMixedAlgorithm()
algorithmMix := textrank.NewChainAlgorithm()
// Add text.
tr.Populate(rawText, language, rule)
Expand Down Expand Up @@ -294,7 +294,7 @@ processes.
tr2 := textrank.NewTextRank()
// Using a little bit more complex algorithm to ranking text.
algorithmMix := textrank.NewMixedAlgorithm()
algorithmMix := textrank.NewChainAlgorithm()
// Add text to the second graph.
tr2.Populate(rawText, language, rule)
Expand Down
116 changes: 32 additions & 84 deletions rank/algorithm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package rank

import (
"math"
"fmt"
)

// Algorithm interface and its methods make possible the polimorf usage of
Expand All @@ -21,7 +20,7 @@ type Algorithm interface {
}

// AlgorithmDefault struct is the basic implementation of Algorithm. It can
// weight and normalize a word or phrase by comparing them.
// weight a word or phrase by comparing them.
type AlgorithmDefault struct{}

// NewAlgorithmDefault constructor retrieves an AlgorithmDefault pointer.
Expand All @@ -30,8 +29,7 @@ func NewAlgorithmDefault() *AlgorithmDefault {
}

// WeightingRelation method is the traditional algorithm of text rank to
// weighting and normalizing a phrase. It always retrieves a float number
// between 0.00 and 1.00.
// weighting a phrase.
func (a *AlgorithmDefault) WeightingRelation(
word1ID int,
word2ID int,
Expand All @@ -46,8 +44,7 @@ func (a *AlgorithmDefault) WeightingRelation(
return float32(relationQty)
}

// WeightingHits method ranks the words by their number of usage. It always
// retrieves a float number between 0.00 and 1.00.
// WeightingHits method ranks the words by their occurrence.
func (a *AlgorithmDefault) WeightingHits(
wordID int,
rank *Rank,
Expand All @@ -61,103 +58,54 @@ func (a *AlgorithmDefault) WeightingHits(
return float32(weight)
}

// AlgorithmMixed struct is the combined implementation of Algorithm. A good
// example how weighting can be changed by a different implementations. It can
// weight and normalize a word or phrase by comparing them.
type AlgorithmMixed struct{}
// AlgorithmChain struct is the combined implementation of Algorithm. It is a
// good example how weighting can be changed by a different implementations. It
// can weight a word or phrase by comparing them.
type AlgorithmChain struct{}

// NewAlgorithmMixed constructor retrieves an AlgorithmMixed pointer.
func NewAlgorithmMixed() *AlgorithmMixed {
return &AlgorithmMixed{}
// NewAlgorithmChain constructor retrieves an AlgorithmChain pointer.
func NewAlgorithmChain() *AlgorithmChain {
return &AlgorithmChain{}
}

// WeightingRelation method is a combined algorithm of text rank and word
// intensity it weights and normalizes a phrase. It always retrieves a float
// number between 0.00 and 1.00.
func (a *AlgorithmMixed) WeightingRelation(
// occurrence, it weights a phrase.
func (a *AlgorithmChain) WeightingRelation(
word1ID int,
word2ID int,
rank *Rank,
) float32 {
relationQty := rank.Relation.Node[word1ID][word2ID].Qty
word1Qty := rank.Words[word1ID].Qty
word2Qty := rank.Words[word2ID].Qty

l := false
if rank.Words[word1ID].Token == "extension" && rank.Words[word2ID].Token == "gnome" {
fmt.Println("run")
l = true
}

logging := func(word1ID int, word2ID int) {
if l {
fmt.Println(rank.Words[word1ID].Token + " - " + rank.Words[word2ID].Token)
}
}

qty := 0;

for otherW2ID := range rank.Words[word1ID].ConnectionRight {
if otherW2ID != word2ID {
if v, ok := rank.Relation.Node[word1ID][otherW2ID]; ok {
qty += v.Qty
logging(word1ID, otherW2ID)
} else if v, ok := rank.Relation.Node[otherW2ID][word1ID]; ok {
logging(otherW2ID, word1ID)
qty += v.Qty
}
}
}
qDiff := float32(math.Abs(float64(word1Qty)-float64(word2Qty))) / 100
weight := float32(relationQty) + qDiff

for otherW2ID := range rank.Words[word1ID].ConnectionLeft {
if otherW2ID != word2ID {
if v, ok := rank.Relation.Node[word1ID][otherW2ID]; ok {
qty += v.Qty
logging(word1ID, otherW2ID)
} else if v, ok := rank.Relation.Node[otherW2ID][word1ID]; ok {
qty += v.Qty
logging(otherW2ID, word1ID)
}
}
}

for otherW1ID := range rank.Words[word2ID].ConnectionRight {
if otherW1ID != word1ID {
if v, ok := rank.Relation.Node[word2ID][otherW1ID]; ok {
qty += v.Qty
logging(word2ID, otherW1ID)
} else if v, ok := rank.Relation.Node[otherW1ID][word2ID]; ok {
qty += v.Qty
logging(otherW1ID, word2ID)
}
}
}

for otherW1ID := range rank.Words[word2ID].ConnectionLeft {
if otherW1ID != word1ID {
if v, ok := rank.Relation.Node[word2ID][otherW1ID]; ok {
qty += v.Qty
logging(word2ID, otherW1ID)
} else if v, ok := rank.Relation.Node[otherW1ID][word2ID]; ok {
qty += v.Qty
logging(otherW1ID, word2ID)
}
}
}

if math.IsNaN(float64(relationQty)) {
if math.IsNaN(float64(weight)) {
return 0
}

//@todo word count?
return float32(relationQty) + (float32(qty)/100)
return weight
}

// WeightingHits method ranks the words by their number of usage. It always
// retrieves a float number between 0.00 and 1.00.
func (a *AlgorithmMixed) WeightingHits(
// WeightingHits method ranks the words by their occurrence.
func (a *AlgorithmChain) WeightingHits(
wordID int,
rank *Rank,
) float32 {
weight := rank.Words[wordID].Qty
word := rank.Words[wordID]
qty := 0

for leftWordID, leftWordQty := range word.ConnectionLeft {
qty += rank.Words[leftWordID].Qty * leftWordQty
}

for rightWordID, rightWordQty := range word.ConnectionRight {
qty += rank.Words[rightWordID].Qty * rightWordQty
}

weight := float32(word.Qty) + (float32(qty))

if math.IsNaN(float64(weight)) {
return 0
Expand Down
53 changes: 37 additions & 16 deletions rank/algorithm_test.go
Original file line number Diff line number Diff line change
@@ -1,40 +1,61 @@
package rank
/*

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestWeightingRelation(t *testing.T) {
rank := createRank()
def := NewAlgorithmDefault()
weightDef := def.WeightingRelation(1, 2, 5, 1, 10, 2, 2, 1, 15)
weightDef := def.WeightingRelation(0, 1, rank)

assert.Equal(t, float32(0.44444445), weightDef)
assert.Equal(t, float32(2), weightDef)

mix := NewAlgorithmMixed()
weightMix := mix.WeightingRelation(1, 2, 5, 1, 10, 2, 2, 1, 15)
chain := NewAlgorithmChain()
weightChain := chain.WeightingRelation(0, 1, rank)

assert.Equal(t, float32(0.2173913), weightMix)
assert.Equal(t, float32(2.01), weightChain)

weightMix = mix.WeightingRelation(1, 2, 1, 1, 1, 1, 1, 1, 1)
weightChain = chain.WeightingRelation(2, 3, rank)

assert.Equal(t, float32(0), weightMix)
assert.Equal(t, float32(1), weightChain)
}

func TestWeightingHits(t *testing.T) {
rank := createRank()

def := NewAlgorithmDefault()
weightDef := def.WeightingHits(1, 5, 1, 10)
weightDef := def.WeightingHits(0, rank)

assert.Equal(t, float32(2), weightDef)

chain := NewAlgorithmChain()
weightChain := chain.WeightingHits(0, rank)

assert.Equal(t, float32(0.44444445), weightDef)
assert.Equal(t, float32(3), weightChain)

weightChain = chain.WeightingHits(2, rank)

assert.Equal(t, float32(3), weightChain)
}

mix := NewAlgorithmMixed()
weightMix := mix.WeightingHits(1, 5, 1, 10)
func createRank() *Rank {
rank := NewRank()
rank.AddNewWord("word1", -1, 0)
rank.AddNewWord("word2", 0, 0)
rank.UpdateWord("word1", 1, 0)
rank.AddNewWord("word3", 0, 0)
rank.AddNewWord("word4", 2, 0)

assert.Equal(t, float32(0.44444445), weightMix)
rank.Relation.AddRelation(0, 1, 0)
rank.Relation.AddRelation(1, 0, 0)
rank.Relation.AddRelation(0, 2, 0)
rank.Relation.AddRelation(2, 3, 0)

weightMix = mix.WeightingHits(1, 1, 1, 1)
rank.Relation.Max = 3
rank.Relation.Min = 1

assert.Equal(t, float32(0), weightMix)
return rank
}
*/
6 changes: 3 additions & 3 deletions textrank.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ func NewDefaultAlgorithm() *rank.AlgorithmDefault {
return rank.NewAlgorithmDefault()
}

// NewMixedAlgorithm function retrieves an Algorithm object. It defines how
// NewChainAlgorithm function retrieves an Algorithm object. It defines how
// should work the text ranking algorithm, the weighting. This is an alternative
// way to ranking words by weighting the number of the words. Because Algorithm
// is an interface it's possible to modify the ranking algorithm by inject
// different implementation. This is the 4th step to use TextRank.
func NewMixedAlgorithm() *rank.AlgorithmMixed {
return rank.NewAlgorithmMixed()
func NewChainAlgorithm() *rank.AlgorithmChain {
return rank.NewAlgorithmChain()
}

// Populate method adds a raw text to the text-ranking graph. It parses,
Expand Down
36 changes: 11 additions & 25 deletions textrank_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,11 @@ import (

"github.com/DavidBelicza/TextRank/rank"
"github.com/stretchr/testify/assert"
"fmt"
)

func TestOnSingleThread(t *testing.T) {
rawText := "Over the past fortnight we asked you to nominate your top extensions for the GNOME desktop. And you did just that. Having now sifted through the hundreds of entries, we’re ready to reveal your favourite GNOME Shell extensions. GNOME 3 (which is more commonly used with the GNOME Shell) has an extension framework that lets developers (and users) extend, build on, and shape how the desktop looks, acts and functions. Dash to Dock takes the GNOME Dash — this is the ‘favourites bar’ that appears on the left-hand side of the screen in the Activities overlay — and transforms it into a desktop dock. And just like Plank, Docky or AWN you can add app launchers, rearrange them, and use them to minimise, restore and switch between app windows. Dash to Dock has many of the common “Dock” features you’d expect, including autohide and intellihide, a fixed-width mode, adjustable icon size, and custom themes. My biggest pet peeve with GNOME Shell is its legacy app tray that hides in the bottom left of the screen. All extraneous non-system applets, indicators and tray icons hide down here. This makes it a little harder to use applications that rely on a system tray presence, like Skype, Franz, Telegram, and Dropbox. TopIcons Plus is the quick way to put GNOME system tray icons back where they belong: on show and in reach. The extension moves legacy tray icons from the bottom left of Gnome Shell to the right-hand side of the top panel. A well-stocked settings panel lets you adjust icon opacity, color, padding, size and tray position. Dive into the settings to adjust the sizing, styling and positioning of icons. Like the popular daily stimulant of choice, the Caffeine GNOME extension keeps your computer awake. It couldn’t be simpler to use: just click the empty mug icon. An empty cup means you’re using normal auto suspend rules – e.g., a screensaver – while a freshly brewed cup of coffee means auto suspend and screensaver are turned off. The Caffeine GNOME extension supports GNOME Shell 3.4 or later. Familiar with applications like Guake and Tilda? If so, you’ll instantly see the appeal of the (superbly named) Drop Down Terminal GNOME extension. When installed just tap the key above the tab key (though it can be changed to almost any key you wish) to get instant access to the command line. Want to speed up using workspaces? This simple tool lets you do just that. Once installed you can quickly switch between workspaces by scrolling over the top panel - no need to enter the Activities Overlay!"

tr0 := NewTextRank()
tr0.Populate(rawText, NewDefaultLanguage(), NewDefaultRule())
tr0.Ranking(NewMixedAlgorithm())

fmt.Println(FindPhrases(tr0))


tr := NewTextRank()
rule := NewDefaultRule()
language := NewDefaultLanguage()
Expand All @@ -29,10 +21,10 @@ func TestOnSingleThread(t *testing.T) {

assertTheGnomeTestTextDefault(t, tr)

//algorithmMix := NewMixedAlgorithm()
//tr.Ranking(algorithmMix)
algorithmChain := NewChainAlgorithm()
tr.Ranking(algorithmChain)

//assertTheGnomeTestTextMix(t, tr)
assertTheGnomeTestTextChain(t, tr)
}

func TestOnMultiThread(t *testing.T) {
Expand Down Expand Up @@ -170,23 +162,17 @@ func assertTheGnomeTestTextDefault(t *testing.T, textRank *TextRank) {
assert.Equal(t, foundSentences[0].Value, rankForCheck.SentenceMap[foundSentences[0].ID])
}

func assertTheGnomeTestTextMix(t *testing.T, textRank *TextRank) {
func assertTheGnomeTestTextChain(t *testing.T, textRank *TextRank) {
mostPopulars := []string{
"gnome shell",
"extension gnome",
"icons tray",
"gnome caffeine",
"gnome commonly",
"gnome favourite",
"gnome terminal",
"gnome way",
"gnome tray",
"gnome extensions",
"gnome supports",
"gnome used",
"gnome dash",
"gnome desktop",
"gnome takes",
"gnome left",
"gnome peeve",
"key tab",
"key changed",
"overlay activities",
"auto suspend",
"dock dash",
}

phrases := FindPhrases(textRank)
Expand Down

0 comments on commit f637912

Please sign in to comment.