// Copyright 2024 The Forgejo Authors c/o Codeberg e.V.. All rights reserved.
// SPDX-License-Identifier: MIT

package integration

import (
	"context"
	"net/url"
	"strings"
	"testing"
	"time"

	"code.gitea.io/gitea/models/db"
	repo_model "code.gitea.io/gitea/models/repo"
	"code.gitea.io/gitea/models/unittest"
	user_model "code.gitea.io/gitea/models/user"
	"code.gitea.io/gitea/modules/git"
	"code.gitea.io/gitea/modules/indexer/stats"
	"code.gitea.io/gitea/modules/queue"
	repo_service "code.gitea.io/gitea/services/repository"
	files_service "code.gitea.io/gitea/services/repository/files"
	"code.gitea.io/gitea/tests"

	"github.com/stretchr/testify/assert"
)

func createLangStatTestRepo(t *testing.T) (*repo_model.Repository, func()) {
	t.Helper()

	user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})

	// Create a new repository
	repo, err := repo_service.CreateRepository(db.DefaultContext, user2, user2, repo_service.CreateRepoOptions{
		Name:          "lang-stat-test",
		Description:   "minimal repo for language stats testing",
		AutoInit:      true,
		Gitignores:    "Go",
		License:       "MIT",
		Readme:        "Default",
		DefaultBranch: "main",
		IsPrivate:     false,
	})
	assert.NoError(t, err)
	assert.NotEmpty(t, repo)

	return repo, func() {
		repo_service.DeleteRepository(db.DefaultContext, user2, repo, false)
	}
}

func addLangStatTestFiles(t *testing.T, repo *repo_model.Repository, contents string) string {
	t.Helper()

	owner := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: repo.OwnerID})

	addFilesResp, err := files_service.ChangeRepoFiles(git.DefaultContext, repo, owner, &files_service.ChangeRepoFilesOptions{
		Files: []*files_service.ChangeRepoFile{
			{
				Operation:     "create",
				TreePath:      ".gitattributes",
				ContentReader: strings.NewReader(contents),
			},
			{
				Operation:     "create",
				TreePath:      "docs.md",
				ContentReader: strings.NewReader("This **is** a `markdown` file.\n"),
			},
			{
				Operation:     "create",
				TreePath:      "foo.c",
				ContentReader: strings.NewReader(`#include <stdio.h>\nint main() {\n  printf("Hello world!\n");\n  return 0;\n}\n`),
			},
			{
				Operation:     "create",
				TreePath:      "foo.nib",
				ContentReader: strings.NewReader("Pinky promise, this is not a generated file!\n"),
			},
			{
				Operation:     "create",
				TreePath:      ".dot.pas",
				ContentReader: strings.NewReader("program Hello;\nbegin\n  writeln('Hello, world.');\nend.\n"),
			},
			{
				Operation:     "create",
				TreePath:      "cpplint.py",
				ContentReader: strings.NewReader(`#! /usr/bin/env python\n\nprint("Hello world!")\n`),
			},
			{
				Operation:     "create",
				TreePath:      "some-file.xml",
				ContentReader: strings.NewReader(`<?xml version="1.0"?>\n<foo>\n <bar>Hello</bar>\n</foo>\n`),
			},
		},
		Message:   "add files",
		OldBranch: "main",
		NewBranch: "main",
		Author: &files_service.IdentityOptions{
			Name:  owner.Name,
			Email: owner.Email,
		},
		Committer: &files_service.IdentityOptions{
			Name:  owner.Name,
			Email: owner.Email,
		},
		Dates: &files_service.CommitDateOptions{
			Author:    time.Now(),
			Committer: time.Now(),
		},
	})
	assert.NoError(t, err)
	assert.NotEmpty(t, addFilesResp)

	return addFilesResp.Commit.SHA
}

func TestRepoLangStats(t *testing.T) {
	onGiteaRun(t, func(t *testing.T, u *url.URL) {
		/******************
		 ** Preparations **
		 ******************/
		prep := func(t *testing.T, attribs string) (*repo_model.Repository, string, func()) {
			t.Helper()

			repo, f := createLangStatTestRepo(t)
			sha := addLangStatTestFiles(t, repo, attribs)

			return repo, sha, f
		}

		getFreshLanguageStats := func(t *testing.T, repo *repo_model.Repository, sha string) repo_model.LanguageStatList {
			t.Helper()

			err := stats.UpdateRepoIndexer(repo)
			assert.NoError(t, err)

			assert.NoError(t, queue.GetManager().FlushAll(context.Background(), 10*time.Second))

			status, err := repo_model.GetIndexerStatus(db.DefaultContext, repo, repo_model.RepoIndexerTypeStats)
			assert.NoError(t, err)
			assert.Equal(t, sha, status.CommitSha)
			langs, err := repo_model.GetTopLanguageStats(db.DefaultContext, repo, 5)
			assert.NoError(t, err)

			return langs
		}

		/***********
		 ** Tests **
		 ***********/

		// 1. By default, documentation is not indexed
		t.Run("default", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)

			// While this is a fairly short test, this exercises a number of
			// things:
			//
			// - `.gitattributes` is empty, so `isDetectable.IsFalse()`,
			//   `isVendored.IsTrue()`, and `isDocumentation.IsTrue()` will be
			//   false for every file, because these are only true if an
			//   attribute is explicitly set.
			//
			// - There is `.dot.pas`, which would be considered Pascal source,
			//   but it is a dotfile (thus, `enry.IsDotFile()` applies), and as
			//   such, is not considered.
			//
			// - `some-file.xml` will be skipped because Enry considers XML
			//   configuration, and `enry.IsConfiguration()` will catch it.
			//
			// - `!isVendored.IsFalse()` evaluates to true, so
			//   `analyze.isVendor()` will be called on `cpplint.py`, which will
			//   be considered vendored, even though both the filename and
			//   contents would otherwise make it Python.
			//
			// - `!isDocumentation.IsFalse()` evaluates to true, so
			//   `enry.IsDocumentation()` will be called for `docs.md`, and will
			//   be considered documentation, thus, skipped.
			//
			// Thus, this exercises all of the conditions in the first big if
			// that is supposed to filter out files early. With two short asserts!

			assert.Len(t, langs, 1)
			assert.Equal(t, "C", langs[0].Language)
		})

		// 2. Marking foo.c as non-detectable
		t.Run("foo.c non-detectable", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "foo.c linguist-detectable=false\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Empty(t, langs)
		})

		// 3. Marking Markdown detectable
		t.Run("detectable markdown", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "*.md linguist-detectable\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Len(t, langs, 2)
			assert.Equal(t, "C", langs[0].Language)
			assert.Equal(t, "Markdown", langs[1].Language)
		})

		// 4. Marking foo.c as documentation
		t.Run("foo.c as documentation", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "foo.c linguist-documentation\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Empty(t, langs)
		})

		// 5. Overriding a generated file
		t.Run("linguist-generated=false", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "foo.nib linguist-generated=false\nfoo.nib linguist-language=Perl\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Len(t, langs, 2)
			assert.Equal(t, "C", langs[0].Language)
			assert.Equal(t, "Perl", langs[1].Language)
		})

		// 6. Disabling vendoring for a file
		t.Run("linguist-vendored=false", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "cpplint.py linguist-vendored=false\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Len(t, langs, 2)
			assert.Equal(t, "C", langs[0].Language)
			assert.Equal(t, "Python", langs[1].Language)
		})

		// 7. Disabling vendoring for a file, with -linguist-vendored
		t.Run("-linguist-vendored", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "cpplint.py -linguist-vendored\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Len(t, langs, 2)
			assert.Equal(t, "C", langs[0].Language)
			assert.Equal(t, "Python", langs[1].Language)
		})

		// 8. Marking foo.c as vendored
		t.Run("foo.c as vendored", func(t *testing.T) {
			defer tests.PrintCurrentTest(t)()

			repo, sha, f := prep(t, "foo.c linguist-vendored\n")
			defer f()

			langs := getFreshLanguageStats(t, repo, sha)
			assert.Empty(t, langs)
		})
	})
}