Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change language statistics to save size instead of percentage #11681

Merged
merged 4 commits into from
May 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions models/migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ var migrations = []Migration{
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
// v139 -> v140
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
// v140 -> v141
NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize),
}

// GetCurrentDBVersion returns the current db version
Expand Down
56 changes: 56 additions & 0 deletions models/migrations/v140.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package migrations

import (
"fmt"

"code.gitea.io/gitea/modules/setting"

"xorm.io/xorm"
)

func fixLanguageStatsToSaveSize(x *xorm.Engine) error {
// LanguageStat see models/repo_language_stats.go
type LanguageStat struct {
Size int64 `xorm:"NOT NULL DEFAULT 0"`
}

// RepoIndexerType specifies the repository indexer type
type RepoIndexerType int

const (
// RepoIndexerTypeCode code indexer
RepoIndexerTypeCode RepoIndexerType = iota // 0
// RepoIndexerTypeStats repository stats indexer
RepoIndexerTypeStats // 1
)

// RepoIndexerStatus see models/repo_indexer.go
type RepoIndexerStatus struct {
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
}

if err := x.Sync2(new(LanguageStat)); err != nil {
return fmt.Errorf("Sync2: %v", err)
}

x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats})

// Delete language stat statuses
truncExpr := "TRUNCATE TABLE"
if setting.Database.UseSQLite3 {
truncExpr = "DELETE FROM"
}

// Delete language stats
if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil {
return err
}

sess := x.NewSession()
defer sess.Close()
return dropTableColumns(sess, "language_stat", "percentage")
}
100 changes: 82 additions & 18 deletions models/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,28 @@ type LanguageStat struct {
CommitID string
IsPrimary bool
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
Percentage float32 `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
Color string `xorm:"-"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}

// specialLanguages defines list of languages that are excluded from the calculation
// unless they are the only language present in repository. Only languages which under
// normal circumstances are not considered to be code should be listed here.
var specialLanguages = map[string]struct{}{
"XML": {},
"JSON": {},
"TOML": {},
"YAML": {},
"INI": {},
"SQL": {},
"SVG": {},
"Text": {},
"Markdown": {},
"other": {},
}

// LanguageStatList defines a list of language statistics
type LanguageStatList []*LanguageStat

Expand All @@ -34,12 +51,53 @@ func (stats LanguageStatList) loadAttributes() {
}
}

func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
langPerc := make(map[string]float32)
var otherPerc float32 = 100
var total int64
// Check that repository has at least one non-special language
var skipSpecial bool
for _, stat := range stats {
lafriks marked this conversation as resolved.
Show resolved Hide resolved
if _, ok := specialLanguages[stat.Language]; !ok {
skipSpecial = true
break
}
}
for _, stat := range stats {
// Exclude specific languages from percentage calculation
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
continue
}
total += stat.Size
}
if total > 0 {
for _, stat := range stats {
// Exclude specific languages from percentage calculation
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
continue
}
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
if perc <= 0.1 {
continue
}
otherPerc -= perc
langPerc[stat.Language] = perc
}
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
} else {
otherPerc = 100
}
if otherPerc > 0 {
langPerc["other"] = otherPerc
}
return langPerc
}

func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
stats := make(LanguageStatList, 0, 6)
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil {
return nil, err
}
stats.loadAttributes()
return stats, nil
}

Expand All @@ -54,13 +112,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
if err != nil {
return nil, err
}
perc := stats.getLanguagePercentages()
topstats := make(LanguageStatList, 0, limit)
var other float32
for i := range stats {
if _, ok := perc[stats[i].Language]; !ok {
continue
}
if stats[i].Language == "other" || len(topstats) >= limit {
other += stats[i].Percentage
other += perc[stats[i].Language]
continue
}
stats[i].Percentage = perc[stats[i].Language]
topstats = append(topstats, stats[i])
}
if other > 0 {
Expand All @@ -71,11 +134,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
Percentage: float32(math.Round(float64(other)*10) / 10),
})
}
topstats.loadAttributes()
return topstats, nil
}

// UpdateLanguageStats updates the language statistics for repository
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error {
sess := x.NewSession()
if err := sess.Begin(); err != nil {
return err
Expand All @@ -87,24 +151,24 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
return err
}
var topLang string
var p float32
for lang, perc := range stats {
if perc > p {
p = perc
var s int64
for lang, size := range stats {
if size > s {
s = size
topLang = strings.ToLower(lang)
}
}

for lang, perc := range stats {
for lang, size := range stats {
upd := false
llang := strings.ToLower(lang)
for _, s := range oldstats {
// Update already existing language
if strings.ToLower(s.Language) == llang {
s.CommitID = commitID
s.IsPrimary = llang == topLang
s.Percentage = perc
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
s.Size = size
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil {
return err
}
upd = true
Expand All @@ -114,11 +178,11 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
// Insert new language
if !upd {
if _, err := sess.Insert(&LanguageStat{
RepoID: repo.ID,
CommitID: commitID,
IsPrimary: llang == topLang,
Language: lang,
Percentage: perc,
RepoID: repo.ID,
CommitID: commitID,
IsPrimary: llang == topLang,
Language: lang,
Size: size,
}); err != nil {
return err
}
Expand Down Expand Up @@ -153,7 +217,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error {
return err
}
RepoLang := make(LanguageStatList, 0, 6)
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil {
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil {
return err
}
if len(RepoLang) > 0 {
Expand Down
25 changes: 6 additions & 19 deletions modules/git/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"bytes"
"io"
"io/ioutil"
"math"

"code.gitea.io/gitea/modules/analyze"

Expand All @@ -21,7 +20,7 @@ import (
const fileSizeLimit int64 = 16 * 1024 * 1024

// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
if err != nil {
return nil, err
Expand All @@ -43,7 +42,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
}

sizes := make(map[string]int64)
var total int64
err = tree.Files().ForEach(func(f *object.File) error {
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
Expand All @@ -60,33 +58,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e

language := analyze.GetCodeLanguage(f.Name, content)
if language == enry.OtherLanguage || language == "" {
return nil
language = "other"
}

sizes[language] += f.Size
total += f.Size

return nil
})
if err != nil {
return nil, err
}

stats := make(map[string]float32)
var otherPerc float32 = 100
for language, size := range sizes {
perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
if perc <= 0.1 {
continue
}
otherPerc -= perc
stats[language] = perc
if len(sizes) == 0 {
sizes["other"] = 0
}
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
if otherPerc > 0 {
stats["other"] = otherPerc
}
return stats, nil

return sizes, nil
}

func readFile(f *object.File, limit int64) ([]byte, error) {
Expand Down
3 changes: 3 additions & 0 deletions modules/indexer/stats/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ func TestRepoStatsIndex(t *testing.T) {

repo, err := models.GetRepositoryByID(1)
assert.NoError(t, err)
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
assert.NoError(t, err)
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
langs, err := repo.GetTopLanguageStats(5)
assert.NoError(t, err)
assert.Len(t, langs, 1)
Expand Down