// Copyright 2024 The Forgejo Authors c/o Codeberg e.V.. All rights reserved. // SPDX-License-Identifier: MIT package integration import ( "context" "net/url" "strings" "testing" "time" "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/models/unittest" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/indexer/stats" "code.gitea.io/gitea/modules/queue" repo_service "code.gitea.io/gitea/services/repository" files_service "code.gitea.io/gitea/services/repository/files" "code.gitea.io/gitea/tests" "github.com/stretchr/testify/assert" ) func createLangStatTestRepo(t *testing.T) (*repo_model.Repository, func()) { t.Helper() user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) // Create a new repository repo, err := repo_service.CreateRepository(db.DefaultContext, user2, user2, repo_service.CreateRepoOptions{ Name: "lang-stat-test", Description: "minimal repo for language stats testing", AutoInit: true, Gitignores: "Go", License: "MIT", Readme: "Default", DefaultBranch: "main", IsPrivate: false, }) assert.NoError(t, err) assert.NotEmpty(t, repo) return repo, func() { repo_service.DeleteRepository(db.DefaultContext, user2, repo, false) } } func addLangStatTestFiles(t *testing.T, repo *repo_model.Repository, contents string) string { t.Helper() owner := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: repo.OwnerID}) addFilesResp, err := files_service.ChangeRepoFiles(git.DefaultContext, repo, owner, &files_service.ChangeRepoFilesOptions{ Files: []*files_service.ChangeRepoFile{ { Operation: "create", TreePath: ".gitattributes", ContentReader: strings.NewReader(contents), }, { Operation: "create", TreePath: "docs.md", ContentReader: strings.NewReader("This **is** a `markdown` file.\n"), }, { Operation: "create", TreePath: "foo.c", ContentReader: strings.NewReader(`#include <stdio.h>\nint main() {\n printf("Hello world!\n");\n return 0;\n}\n`), }, { Operation: "create", TreePath: "foo.nib", ContentReader: strings.NewReader("Pinky promise, this is not a generated file!\n"), }, { Operation: "create", TreePath: ".dot.pas", ContentReader: strings.NewReader("program Hello;\nbegin\n writeln('Hello, world.');\nend.\n"), }, { Operation: "create", TreePath: "cpplint.py", ContentReader: strings.NewReader(`#! /usr/bin/env python\n\nprint("Hello world!")\n`), }, { Operation: "create", TreePath: "some-file.xml", ContentReader: strings.NewReader(`<?xml version="1.0"?>\n<foo>\n <bar>Hello</bar>\n</foo>\n`), }, }, Message: "add files", OldBranch: "main", NewBranch: "main", Author: &files_service.IdentityOptions{ Name: owner.Name, Email: owner.Email, }, Committer: &files_service.IdentityOptions{ Name: owner.Name, Email: owner.Email, }, Dates: &files_service.CommitDateOptions{ Author: time.Now(), Committer: time.Now(), }, }) assert.NoError(t, err) assert.NotEmpty(t, addFilesResp) return addFilesResp.Commit.SHA } func TestRepoLangStats(t *testing.T) { onGiteaRun(t, func(t *testing.T, u *url.URL) { /****************** ** Preparations ** ******************/ prep := func(t *testing.T, attribs string) (*repo_model.Repository, string, func()) { t.Helper() repo, f := createLangStatTestRepo(t) sha := addLangStatTestFiles(t, repo, attribs) return repo, sha, f } getFreshLanguageStats := func(t *testing.T, repo *repo_model.Repository, sha string) repo_model.LanguageStatList { t.Helper() err := stats.UpdateRepoIndexer(repo) assert.NoError(t, err) assert.NoError(t, queue.GetManager().FlushAll(context.Background(), 10*time.Second)) status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats) assert.NoError(t, err) assert.Equal(t, sha, status.CommitSha) langs, err := repo_model.GetTopLanguageStats(db.DefaultContext, repo, 5) assert.NoError(t, err) return langs } /*********** ** Tests ** ***********/ // 1. By default, documentation is not indexed t.Run("default", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "") defer f() langs := getFreshLanguageStats(t, repo, sha) // While this is a fairly short test, this exercises a number of // things: // // - `.gitattributes` is empty, so `isDetectable.IsFalse()`, // `isVendored.IsTrue()`, and `isDocumentation.IsTrue()` will be // false for every file, because these are only true if an // attribute is explicitly set. // // - There is `.dot.pas`, which would be considered Pascal source, // but it is a dotfile (thus, `enry.IsDotFile()` applies), and as // such, is not considered. // // - `some-file.xml` will be skipped because Enry considers XML // configuration, and `enry.IsConfiguration()` will catch it. // // - `!isVendored.IsFalse()` evaluates to true, so // `analyze.isVendor()` will be called on `cpplint.py`, which will // be considered vendored, even though both the filename and // contents would otherwise make it Python. // // - `!isDocumentation.IsFalse()` evaluates to true, so // `enry.IsDocumentation()` will be called for `docs.md`, and will // be considered documentation, thus, skipped. // // Thus, this exercises all of the conditions in the first big if // that is supposed to filter out files early. With two short asserts! assert.Len(t, langs, 1) assert.Equal(t, "C", langs[0].Language) }) // 2. Marking foo.c as non-detectable t.Run("foo.c non-detectable", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "foo.c linguist-detectable=false\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Empty(t, langs) }) // 3. Marking Markdown detectable t.Run("detectable markdown", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "*.md linguist-detectable\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Len(t, langs, 2) assert.Equal(t, "C", langs[0].Language) assert.Equal(t, "Markdown", langs[1].Language) }) // 4. Marking foo.c as documentation t.Run("foo.c as documentation", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "foo.c linguist-documentation\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Empty(t, langs) }) // 5. Overriding a generated file t.Run("linguist-generated=false", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "foo.nib linguist-generated=false\nfoo.nib linguist-language=Perl\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Len(t, langs, 2) assert.Equal(t, "C", langs[0].Language) assert.Equal(t, "Perl", langs[1].Language) }) // 6. Disabling vendoring for a file t.Run("linguist-vendored=false", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "cpplint.py linguist-vendored=false\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Len(t, langs, 2) assert.Equal(t, "C", langs[0].Language) assert.Equal(t, "Python", langs[1].Language) }) // 7. Disabling vendoring for a file, with -linguist-vendored t.Run("-linguist-vendored", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "cpplint.py -linguist-vendored\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Len(t, langs, 2) assert.Equal(t, "C", langs[0].Language) assert.Equal(t, "Python", langs[1].Language) }) // 8. Marking foo.c as vendored t.Run("foo.c as vendored", func(t *testing.T) { defer tests.PrintCurrentTest(t)() repo, sha, f := prep(t, "foo.c linguist-vendored\n") defer f() langs := getFreshLanguageStats(t, repo, sha) assert.Empty(t, langs) }) }) }