Refactored algorithms @todo docs

DavidBelicza · Mar 8, 2018 · f637912 · f637912
1 parent ae9d20a
commit f637912
Show file tree

Hide file tree

Showing 6 changed files with 87 additions and 132 deletions.
diff --git a/README.md b/README.md
@@ -254,7 +254,7 @@ func main() {
  // Default Language for filtering stop words.
  language := textrank.NewDefaultLanguage()
  // Using a little bit more complex algorithm to ranking text.
- algorithmMix := textrank.NewMixedAlgorithm()
+ algorithmMix := textrank.NewChainAlgorithm()
 
  // Add text.
  tr.Populate(rawText, language, rule)
@@ -304,7 +304,7 @@ func main() {
  tr2 := textrank.NewTextRank()
 
  // Using a little bit more complex algorithm to ranking text.
- algorithmMix := textrank.NewMixedAlgorithm()
+ algorithmMix := textrank.NewChainAlgorithm()
 
  // Add text to the second graph.
  tr2.Populate(rawText, language, rule)

diff --git a/doc.go b/doc.go
@@ -245,7 +245,7 @@ algorithm by Algorithm interface and use it instead of defaults.
  // Default Language for filtering stop words.
  language := textrank.NewDefaultLanguage()
  // Using a little bit more complex algorithm to ranking text.
- algorithmMix := textrank.NewMixedAlgorithm()
+ algorithmMix := textrank.NewChainAlgorithm()
 
  // Add text.
  tr.Populate(rawText, language, rule)
@@ -294,7 +294,7 @@ processes.
  tr2 := textrank.NewTextRank()
 
  // Using a little bit more complex algorithm to ranking text.
- algorithmMix := textrank.NewMixedAlgorithm()
+ algorithmMix := textrank.NewChainAlgorithm()
 
  // Add text to the second graph.
  tr2.Populate(rawText, language, rule)

diff --git a/rank/algorithm.go b/rank/algorithm.go
@@ -2,7 +2,6 @@ package rank
 
 import (
  "math"
- "fmt"
 )
 
 // Algorithm interface and its methods make possible the polimorf usage of
@@ -21,7 +20,7 @@ type Algorithm interface {
 }
 
 // AlgorithmDefault struct is the basic implementation of Algorithm. It can
-// weight and normalize a word or phrase by comparing them.
+// weight a word or phrase by comparing them.
 type AlgorithmDefault struct{}
 
 // NewAlgorithmDefault constructor retrieves an AlgorithmDefault pointer.
@@ -30,8 +29,7 @@ func NewAlgorithmDefault() *AlgorithmDefault {
 }
 
 // WeightingRelation method is the traditional algorithm of text rank to
-// weighting and normalizing a phrase. It always retrieves a float number
-// between 0.00 and 1.00.
+// weighting a phrase.
 func (a *AlgorithmDefault) WeightingRelation(
  word1ID int,
  word2ID int,
@@ -46,8 +44,7 @@ func (a *AlgorithmDefault) WeightingRelation(
  return float32(relationQty)
 }
 
-// WeightingHits method ranks the words by their number of usage. It always
-// retrieves a float number between 0.00 and 1.00.
+// WeightingHits method ranks the words by their occurrence.
 func (a *AlgorithmDefault) WeightingHits(
  wordID int,
  rank *Rank,
@@ -61,103 +58,54 @@ func (a *AlgorithmDefault) WeightingHits(
  return float32(weight)
 }
 
-// AlgorithmMixed struct is the combined implementation of Algorithm. A good
-// example how weighting can be changed by a different implementations. It can
-// weight and normalize a word or phrase by comparing them.
-type AlgorithmMixed struct{}
+// AlgorithmChain struct is the combined implementation of Algorithm. It is a
+// good example how weighting can be changed by a different implementations. It
+// can weight a word or phrase by comparing them.
+type AlgorithmChain struct{}
 
-// NewAlgorithmMixed constructor retrieves an AlgorithmMixed pointer.
-func NewAlgorithmMixed() *AlgorithmMixed {
- return &AlgorithmMixed{}
+// NewAlgorithmChain constructor retrieves an AlgorithmChain pointer.
+func NewAlgorithmChain() *AlgorithmChain {
+ return &AlgorithmChain{}
 }
 
 // WeightingRelation method is a combined algorithm of text rank and word
-// intensity it weights and normalizes a phrase. It always retrieves a float
-// number between 0.00 and 1.00.
-func (a *AlgorithmMixed) WeightingRelation(
+// occurrence, it weights a phrase.
+func (a *AlgorithmChain) WeightingRelation(
  word1ID int,
  word2ID int,
  rank *Rank,
 ) float32 {
  relationQty := rank.Relation.Node[word1ID][word2ID].Qty
+ word1Qty := rank.Words[word1ID].Qty
+ word2Qty := rank.Words[word2ID].Qty
 
- l := false
- if rank.Words[word1ID].Token == "extension" && rank.Words[word2ID].Token == "gnome" {
- fmt.Println("run")
- l = true
- }
-
- logging := func(word1ID int, word2ID int) {
- if l {
- fmt.Println(rank.Words[word1ID].Token + " - " + rank.Words[word2ID].Token)
- }
- }
-
- qty := 0;
-
- for otherW2ID := range rank.Words[word1ID].ConnectionRight {
- if otherW2ID != word2ID {
- if v, ok := rank.Relation.Node[word1ID][otherW2ID]; ok {
- qty += v.Qty
- logging(word1ID, otherW2ID)
- } else if v, ok := rank.Relation.Node[otherW2ID][word1ID]; ok {
- logging(otherW2ID, word1ID)
- qty += v.Qty
- }
- }
- }
+ qDiff := float32(math.Abs(float64(word1Qty)-float64(word2Qty))) / 100
+ weight := float32(relationQty) + qDiff
 
- for otherW2ID := range rank.Words[word1ID].ConnectionLeft {
- if otherW2ID != word2ID {
- if v, ok := rank.Relation.Node[word1ID][otherW2ID]; ok {
- qty += v.Qty
- logging(word1ID, otherW2ID)
- } else if v, ok := rank.Relation.Node[otherW2ID][word1ID]; ok {
- qty += v.Qty
- logging(otherW2ID, word1ID)
- }
- }
- }
-
- for otherW1ID := range rank.Words[word2ID].ConnectionRight {
- if otherW1ID != word1ID {
- if v, ok := rank.Relation.Node[word2ID][otherW1ID]; ok {
- qty += v.Qty
- logging(word2ID, otherW1ID)
- } else if v, ok := rank.Relation.Node[otherW1ID][word2ID]; ok {
- qty += v.Qty
- logging(otherW1ID, word2ID)
- }
- }
- }
-
- for otherW1ID := range rank.Words[word2ID].ConnectionLeft {
- if otherW1ID != word1ID {
- if v, ok := rank.Relation.Node[word2ID][otherW1ID]; ok {
- qty += v.Qty
- logging(word2ID, otherW1ID)
- } else if v, ok := rank.Relation.Node[otherW1ID][word2ID]; ok {
- qty += v.Qty
- logging(otherW1ID, word2ID)
- }
- }
- }
-
- if math.IsNaN(float64(relationQty)) {
+ if math.IsNaN(float64(weight)) {
  return 0
  }
 
- //@todo word count?
- return float32(relationQty) + (float32(qty)/100)
+ return weight
 }
 
-// WeightingHits method ranks the words by their number of usage. It always
-// retrieves a float number between 0.00 and 1.00.
-func (a *AlgorithmMixed) WeightingHits(
+// WeightingHits method ranks the words by their occurrence.
+func (a *AlgorithmChain) WeightingHits(
  wordID int,
  rank *Rank,
 ) float32 {
- weight := rank.Words[wordID].Qty
+ word := rank.Words[wordID]
+ qty := 0
+
+ for leftWordID, leftWordQty := range word.ConnectionLeft {
+ qty += rank.Words[leftWordID].Qty * leftWordQty
+ }
+
+ for rightWordID, rightWordQty := range word.ConnectionRight {
+ qty += rank.Words[rightWordID].Qty * rightWordQty
+ }
+
+ weight := float32(word.Qty) + (float32(qty))
 
  if math.IsNaN(float64(weight)) {
  return 0

diff --git a/rank/algorithm_test.go b/rank/algorithm_test.go
@@ -1,40 +1,61 @@
 package rank
-/*
+
 import (
  "testing"
 
  "github.com/stretchr/testify/assert"
 )
 
 func TestWeightingRelation(t *testing.T) {
+ rank := createRank()
  def := NewAlgorithmDefault()
- weightDef := def.WeightingRelation(1, 2, 5, 1, 10, 2, 2, 1, 15)
+ weightDef := def.WeightingRelation(0, 1, rank)
 
- assert.Equal(t, float32(0.44444445), weightDef)
+ assert.Equal(t, float32(2), weightDef)
 
- mix := NewAlgorithmMixed()
- weightMix := mix.WeightingRelation(1, 2, 5, 1, 10, 2, 2, 1, 15)
+ chain := NewAlgorithmChain()
+ weightChain := chain.WeightingRelation(0, 1, rank)
 
- assert.Equal(t, float32(0.2173913), weightMix)
+ assert.Equal(t, float32(2.01), weightChain)
 
- weightMix = mix.WeightingRelation(1, 2, 1, 1, 1, 1, 1, 1, 1)
+ weightChain = chain.WeightingRelation(2, 3, rank)
 
- assert.Equal(t, float32(0), weightMix)
+ assert.Equal(t, float32(1), weightChain)
 }
 
 func TestWeightingHits(t *testing.T) {
+ rank := createRank()
+
  def := NewAlgorithmDefault()
- weightDef := def.WeightingHits(1, 5, 1, 10)
+ weightDef := def.WeightingHits(0, rank)
+
+ assert.Equal(t, float32(2), weightDef)
+
+ chain := NewAlgorithmChain()
+ weightChain := chain.WeightingHits(0, rank)
 
- assert.Equal(t, float32(0.44444445), weightDef)
+ assert.Equal(t, float32(3), weightChain)
+
+ weightChain = chain.WeightingHits(2, rank)
+
+ assert.Equal(t, float32(3), weightChain)
+}
 
- mix := NewAlgorithmMixed()
- weightMix := mix.WeightingHits(1, 5, 1, 10)
+func createRank() *Rank {
+ rank := NewRank()
+ rank.AddNewWord("word1", -1, 0)
+ rank.AddNewWord("word2", 0, 0)
+ rank.UpdateWord("word1", 1, 0)
+ rank.AddNewWord("word3", 0, 0)
+ rank.AddNewWord("word4", 2, 0)
 
- assert.Equal(t, float32(0.44444445), weightMix)
+ rank.Relation.AddRelation(0, 1, 0)
+ rank.Relation.AddRelation(1, 0, 0)
+ rank.Relation.AddRelation(0, 2, 0)
+ rank.Relation.AddRelation(2, 3, 0)
 
- weightMix = mix.WeightingHits(1, 1, 1, 1)
+ rank.Relation.Max = 3
+ rank.Relation.Min = 1
 
- assert.Equal(t, float32(0), weightMix)
+ return rank
 }
-*/
diff --git a/textrank.go b/textrank.go
@@ -49,13 +49,13 @@ func NewDefaultAlgorithm() *rank.AlgorithmDefault {
  return rank.NewAlgorithmDefault()
 }
 
-// NewMixedAlgorithm function retrieves an Algorithm object. It defines how
+// NewChainAlgorithm function retrieves an Algorithm object. It defines how
 // should work the text ranking algorithm, the weighting. This is an alternative
 // way to ranking words by weighting the number of the words. Because Algorithm
 // is an interface it's possible to modify the ranking algorithm by inject
 // different implementation. This is the 4th step to use TextRank.
-func NewMixedAlgorithm() *rank.AlgorithmMixed {
- return rank.NewAlgorithmMixed()
+func NewChainAlgorithm() *rank.AlgorithmChain {
+ return rank.NewAlgorithmChain()
 }
 
 // Populate method adds a raw text to the text-ranking graph. It parses,

diff --git a/textrank_test.go b/textrank_test.go
@@ -6,19 +6,11 @@ import (
 
  "github.com/DavidBelicza/TextRank/rank"
  "github.com/stretchr/testify/assert"
- "fmt"
 )
 
 func TestOnSingleThread(t *testing.T) {
  rawText := "Over the past fortnight we asked you to nominate your top extensions for the GNOME desktop. And you did just that. Having now sifted through the hundreds of entries, we’re ready to reveal your favourite GNOME Shell extensions. GNOME 3 (which is more commonly used with the GNOME Shell) has an extension framework that lets developers (and users) extend, build on, and shape how the desktop looks, acts and functions. Dash to Dock takes the GNOME Dash — this is the ‘favourites bar’ that appears on the left-hand side of the screen in the Activities overlay — and transforms it into a desktop dock. And just like Plank, Docky or AWN you can add app launchers, rearrange them, and use them to minimise, restore and switch between app windows. Dash to Dock has many of the common “Dock” features you’d expect, including autohide and intellihide, a fixed-width mode, adjustable icon size, and custom themes. My biggest pet peeve with GNOME Shell is its legacy app tray that hides in the bottom left of the screen. All extraneous non-system applets, indicators and tray icons hide down here. This makes it a little harder to use applications that rely on a system tray presence, like Skype, Franz, Telegram, and Dropbox. TopIcons Plus is the quick way to put GNOME system tray icons back where they belong: on show and in reach. The extension moves legacy tray icons from the bottom left of Gnome Shell to the right-hand side of the top panel. A well-stocked settings panel lets you adjust icon opacity, color, padding, size and tray position. Dive into the settings to adjust the sizing, styling and positioning of icons. Like the popular daily stimulant of choice, the Caffeine GNOME extension keeps your computer awake. It couldn’t be simpler to use: just click the empty mug icon. An empty cup means you’re using normal auto suspend rules – e.g., a screensaver – while a freshly brewed cup of coffee means auto suspend and screensaver are turned off. The Caffeine GNOME extension supports GNOME Shell 3.4 or later. Familiar with applications like Guake and Tilda? If so, you’ll instantly see the appeal of the (superbly named) Drop Down Terminal GNOME extension. When installed just tap the key above the tab key (though it can be changed to almost any key you wish) to get instant access to the command line. Want to speed up using workspaces? This simple tool lets you do just that. Once installed you can quickly switch between workspaces by scrolling over the top panel - no need to enter the Activities Overlay!"
 
- tr0 := NewTextRank()
- tr0.Populate(rawText, NewDefaultLanguage(), NewDefaultRule())
- tr0.Ranking(NewMixedAlgorithm())
-
- fmt.Println(FindPhrases(tr0))
-
-
  tr := NewTextRank()
  rule := NewDefaultRule()
  language := NewDefaultLanguage()
@@ -29,10 +21,10 @@ func TestOnSingleThread(t *testing.T) {
 
  assertTheGnomeTestTextDefault(t, tr)
 
- //algorithmMix := NewMixedAlgorithm()
- //tr.Ranking(algorithmMix)
+ algorithmChain := NewChainAlgorithm()
+ tr.Ranking(algorithmChain)
 
- //assertTheGnomeTestTextMix(t, tr)
+ assertTheGnomeTestTextChain(t, tr)
 }
 
 func TestOnMultiThread(t *testing.T) {
@@ -170,23 +162,17 @@ func assertTheGnomeTestTextDefault(t *testing.T, textRank *TextRank) {
  assert.Equal(t, foundSentences[0].Value, rankForCheck.SentenceMap[foundSentences[0].ID])
 }
 
-func assertTheGnomeTestTextMix(t *testing.T, textRank *TextRank) {
+func assertTheGnomeTestTextChain(t *testing.T, textRank *TextRank) {
  mostPopulars := []string{
  "gnome shell",
+ "extension gnome",
+ "icons tray",
  "gnome caffeine",
- "gnome commonly",
- "gnome favourite",
- "gnome terminal",
- "gnome way",
- "gnome tray",
- "gnome extensions",
- "gnome supports",
- "gnome used",
- "gnome dash",
- "gnome desktop",
- "gnome takes",
- "gnome left",
- "gnome peeve",
+ "key tab",
+ "key changed",
+ "overlay activities",
+ "auto suspend",
+ "dock dash",
  }
 
  phrases := FindPhrases(textRank)