Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for linguist-detectable and linguist-documentation #29267

Merged
merged 3 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add support for linguist-detectable and linguist-documentation.
  • Loading branch information
KN4CK3R committed Feb 19, 2024
commit 9bf68d1a752a07eae9e8c877061a2526848f9400
2 changes: 1 addition & 1 deletion modules/git/repo_attribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeRe
}

checker := &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: worktree,
Expand Down
22 changes: 22 additions & 0 deletions modules/git/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,35 @@ package git
import (
"strings"
"unicode"

"code.gitea.io/gitea/modules/optional"
)

const (
fileSizeLimit int64 = 16 * 1024 // 16 KiB
bigFileSize int64 = 1024 * 1024 // 1 MiB
)

// true if "set"/"true", false if "unset"/"false", none otherwise
func linguistToBool(attr map[string]string, name string) optional.Option[bool] {
KN4CK3R marked this conversation as resolved.
Show resolved Hide resolved
if value, has := attr[name]; has && value != "unspecified" {
switch value {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
}
return optional.None[bool]()
}

func linguistToString(attr map[string]string, name string) optional.Option[string] {
if value, has := attr[name]; has && value != "unspecified" {
return optional.Some(value)
}
return optional.None[string]()
}

// mergeLanguageStats mergers language names with different cases. The name with most upper case letters is used.
func mergeLanguageStats(stats map[string]int64) map[string]int64 {
names := map[string]struct {
Expand Down
75 changes: 41 additions & 34 deletions modules/git/repo_language_stats_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strings"
silverwind marked this conversation as resolved.
Show resolved Hide resolved

"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/optional"

"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
Expand Down Expand Up @@ -57,25 +58,47 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil
}

notVendored := false
notGenerated := false
isVendored := optional.None[bool]()
isGenerated := optional.None[bool]()
isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]()

if checker != nil {
attrs, err := checker.CheckPath(f.Name)
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
return nil
}
notVendored = vendored == "false"
isVendored = linguistToBool(attrs, "linguist-vendored")
if isVendored.ValueOrDefault(false) {
return nil
}

isGenerated = linguistToBool(attrs, "linguist-generated")
if isGenerated.ValueOrDefault(false) {
return nil
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
return nil

isDocumentation = linguistToBool(attrs, "linguist-documentation")
if isDocumentation.ValueOrDefault(false) {
return nil
}

isDetectable = linguistToBool(attrs, "linguist-detectable")
if !isDetectable.ValueOrDefault(true) {
return nil
}

hasLanguage := linguistToString(attrs, "linguist-language")
if hasLanguage.Value() == "" {
hasLanguage = linguistToString(attrs, "gitlab-language")
if hasLanguage.Has() {
language := hasLanguage.Value()
if idx := strings.IndexByte(language, '?'); idx >= 0 {
hasLanguage = optional.Some(language[:idx])
}
}
notGenerated = generated == "false"
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
if hasLanguage.Value() != "" {
language := hasLanguage.Value()

// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
Expand All @@ -85,28 +108,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
// this language will always be added to the size
sizes[language] += f.Size
return nil
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}

// this language will always be added to the size
sizes[language] += f.Size
return nil
}
}
}
}

if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
if (!isVendored.Has() && analyze.IsVendor(f.Name)) ||
enry.IsDotFile(f.Name) ||
(!isDocumentation.Has() && enry.IsDocumentation(f.Name)) ||
enry.IsConfiguration(f.Name) {
return nil
}

Expand All @@ -115,12 +124,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if f.Size <= bigFileSize {
content, _ = readFile(f, fileSizeLimit)
}
if !notGenerated && enry.IsGenerated(f.Name, content) {
if !isGenerated.Has() && enry.IsGenerated(f.Name, content) {
return nil
}

// TODO: Use .gitattributes file for linguist overrides

language := analyze.GetCodeLanguage(f.Name, content)
if language == enry.OtherLanguage || language == "" {
return nil
Expand All @@ -138,7 +145,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
included = langtype == enry.Programming || langtype == enry.Markup
includedLanguage[language] = included
}
if included {
if included || isDetectable.ValueOrDefault(false) {
sizes[language] += f.Size
} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
firstExcludedLanguage = language
Expand Down
75 changes: 41 additions & 34 deletions modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"

"github.com/go-enry/go-enry/v2"
)
Expand Down Expand Up @@ -90,25 +91,47 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
continue
}

notVendored := false
notGenerated := false
isVendored := optional.None[bool]()
isGenerated := optional.None[bool]()
isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]()

if checker != nil {
attrs, err := checker.CheckPath(f.Name())
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
continue
}
notVendored = vendored == "false"
isVendored = linguistToBool(attrs, "linguist-vendored")
if isVendored.ValueOrDefault(false) {
continue
}

isGenerated = linguistToBool(attrs, "linguist-generated")
if isGenerated.ValueOrDefault(false) {
continue
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
continue

isDocumentation = linguistToBool(attrs, "linguist-documentation")
if isDocumentation.ValueOrDefault(false) {
continue
}

isDetectable = linguistToBool(attrs, "linguist-detectable")
if !isDetectable.ValueOrDefault(true) {
continue
}

hasLanguage := linguistToString(attrs, "linguist-language")
if hasLanguage.Value() == "" {
hasLanguage = linguistToString(attrs, "gitlab-language")
if hasLanguage.Has() {
language := hasLanguage.Value()
if idx := strings.IndexByte(language, '?'); idx >= 0 {
hasLanguage = optional.Some(language[:idx])
}
}
notGenerated = generated == "false"
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
if hasLanguage.Value() != "" {
language := hasLanguage.Value()

// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
Expand All @@ -118,29 +141,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
// this language will always be added to the size
sizes[language] += f.Size()
continue
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}

// this language will always be added to the size
sizes[language] += f.Size()
continue
}
}

}
}

if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
if (!isVendored.Has() && analyze.IsVendor(f.Name())) ||
enry.IsDotFile(f.Name()) ||
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
enry.IsConfiguration(f.Name()) {
continue
}

Expand Down Expand Up @@ -173,7 +181,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
}
if !notGenerated && enry.IsGenerated(f.Name(), content) {
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
continue
}

Expand All @@ -196,13 +204,12 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
included = langType == enry.Programming || langType == enry.Markup
includedLanguage[language] = included
}
if included {
if included || isDetectable.ValueOrDefault(false) {
sizes[language] += f.Size()
} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
firstExcludedLanguage = language
firstExcludedLanguageSize += f.Size()
}
continue
}

// If there are no included languages add the first excluded language
Expand Down
Loading
Loading