UnityGame/Library/PackageCache/com.unity.searcher/Editor/Searcher/SearcherDatabase.cs

399 lines
13 KiB
C#
Raw Permalink Normal View History

2024-10-27 10:53:47 +03:00
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using JetBrains.Annotations;
using UnityEngine;
namespace UnityEditor.Searcher
{
[PublicAPI]
public class SearcherDatabase : SearcherDatabaseBase
{
Dictionary<string, IReadOnlyList<ValueTuple<string, float>>> m_Index = new Dictionary<string, IReadOnlyList<ValueTuple<string, float>>>();
class Result
{
public SearcherItem item;
public float maxScore;
}
const bool k_IsParallel = true;
public Func<string, SearcherItem, bool> MatchFilter { get; set; }
public static SearcherDatabase Create(
List<SearcherItem> items,
string databaseDirectory,
bool serializeToFile = true
)
{
if (serializeToFile && databaseDirectory != null && !Directory.Exists(databaseDirectory))
Directory.CreateDirectory(databaseDirectory);
var database = new SearcherDatabase(databaseDirectory, items);
if (serializeToFile)
database.SerializeToFile();
database.BuildIndex();
return database;
}
public static SearcherDatabase Load(string databaseDirectory)
{
if (!Directory.Exists(databaseDirectory))
throw new InvalidOperationException("databaseDirectory not found.");
var database = new SearcherDatabase(databaseDirectory, null);
database.LoadFromFile();
database.BuildIndex();
return database;
}
public SearcherDatabase(IReadOnlyCollection<SearcherItem> db)
: this("", db)
{
}
SearcherDatabase(string databaseDirectory, IReadOnlyCollection<SearcherItem> db)
: base(databaseDirectory)
{
m_ItemList = new List<SearcherItem>();
var nextId = 0;
if (db != null)
foreach (var item in db)
AddItemToIndex(item, ref nextId, null);
}
public override List<SearcherItem> Search(string query, out float localMaxScore)
{
// Match assumes the query is trimmed
query = query.Trim(' ', '\t');
localMaxScore = 0;
if (string.IsNullOrWhiteSpace(query))
{
if (MatchFilter == null)
return m_ItemList;
// ReSharper disable once RedundantLogicalConditionalExpressionOperand
if (k_IsParallel && m_ItemList.Count > 100)
return FilterMultiThreaded(query);
return FilterSingleThreaded(query);
}
var finalResults = new List<SearcherItem> { null };
var max = new Result();
var tokenizedQuery = new List<string>();
foreach (var token in Tokenize(query))
{
tokenizedQuery.Add(token.Trim().ToLower());
}
// ReSharper disable once RedundantLogicalConditionalExpressionOperand
if (k_IsParallel && m_ItemList.Count > 100)
SearchMultithreaded(query, tokenizedQuery, max, finalResults);
else
SearchSingleThreaded(query, tokenizedQuery, max, finalResults);
localMaxScore = max.maxScore;
if (max.item != null)
finalResults[0] = max.item;
else
finalResults.RemoveAt(0);
return finalResults;
}
protected virtual bool Match(string query, IReadOnlyList<string> tokenizedQuery, SearcherItem item, out float score)
{
var filter = MatchFilter?.Invoke(query, item) ?? true;
return Match(tokenizedQuery, item.Path, out score) && filter;
}
List<SearcherItem> FilterSingleThreaded(string query)
{
var result = new List<SearcherItem>();
foreach (var searcherItem in m_ItemList)
{
if (!MatchFilter.Invoke(query, searcherItem))
continue;
result.Add(searcherItem);
}
return result;
}
List<SearcherItem> FilterMultiThreaded(string query)
{
var result = new List<SearcherItem>();
var count = Environment.ProcessorCount;
var tasks = new Task[count];
var lists = new List<SearcherItem>[count];
var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count);
for (var i = 0; i < count; i++)
{
var i1 = i;
tasks[i] = Task.Run(() =>
{
lists[i1] = new List<SearcherItem>();
for (var j = 0; j < itemsPerTask; j++)
{
var index = j + itemsPerTask * i1;
if (index >= m_ItemList.Count)
break;
var item = m_ItemList[index];
if (!MatchFilter.Invoke(query, item))
continue;
lists[i1].Add(item);
}
});
}
Task.WaitAll(tasks);
for (var i = 0; i < count; i++)
{
result.AddRange(lists[i]);
}
return result;
}
readonly float k_ScoreCutOff = 0.33f;
void SearchSingleThreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, ICollection<SearcherItem> finalResults)
{
List<Result> results = new List<Result>();
foreach (var item in m_ItemList)
{
float score = 0;
if (query.Length == 0 || Match(query, tokenizedQuery, item, out score))
{
if (score > max.maxScore)
{
max.item = item;
max.maxScore = score;
}
results.Add(new Result() { item = item, maxScore = score});
}
}
PostprocessResults(results, finalResults, max);
}
void SearchMultithreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, List<SearcherItem> finalResults)
{
var count = Environment.ProcessorCount;
var tasks = new Task[count];
var localResults = new Result[count];
var queue = new ConcurrentQueue<Result>();
var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count);
for (var i = 0; i < count; i++)
{
var i1 = i;
localResults[i1] = new Result();
tasks[i] = Task.Run(() =>
{
var result = localResults[i1];
for (var j = 0; j < itemsPerTask; j++)
{
var index = j + itemsPerTask * i1;
if (index >= m_ItemList.Count)
break;
var item = m_ItemList[index];
float score = 0;
if (query.Length == 0 || Match(query, tokenizedQuery, item, out score))
{
if (score > result.maxScore)
{
result.maxScore = score;
result.item = item;
}
queue.Enqueue(new Result { item = item, maxScore = score });
}
}
});
}
Task.WaitAll(tasks);
for (var i = 0; i < count; i++)
{
if (localResults[i].maxScore > max.maxScore)
{
max.maxScore = localResults[i].maxScore;
max.item = localResults[i].item;
}
}
PostprocessResults(queue, finalResults, max);
}
void PostprocessResults(IEnumerable<Result> results, ICollection<SearcherItem> items, Result max)
{
foreach (var result in results)
{
var normalizedScore = result.maxScore / max.maxScore;
if (result.item != null && result.item != max.item && normalizedScore > k_ScoreCutOff)
{
items.Add(result.item);
}
}
}
public override void BuildIndex()
{
m_Index.Clear();
foreach (var item in m_ItemList)
{
if (!m_Index.ContainsKey(item.Path))
{
List<ValueTuple<string, float>> terms = new List<ValueTuple<string, float>>();
// If the item uses synonyms to return results for similar words/phrases, add them to the search terms
IList<string> tokens = null;
if (item.Synonyms == null)
tokens = Tokenize(item.Name);
else
tokens = Tokenize(string.Format("{0} {1}", item.Name, string.Join(" ", item.Synonyms)));
// Fixes bug: https://fogbugz.unity3d.com/f/cases/1359158/
// Without this, node names with spaces or those with Pascal casing were not added to index
var nodeName = item.Name.ToLower().Replace(" ", String.Empty);
tokens.Add(nodeName);
string tokenSuite = "";
foreach (var token in tokens)
{
var t = token.ToLower();
if (t.Length > 1)
{
terms.Add(new ValueTuple<string, float>(t, 0.8f));
}
if (tokenSuite.Length > 0)
{
tokenSuite += " " + t;
terms.Add(new ValueTuple<string, float>(tokenSuite, 1f));
}
else
{
tokenSuite = t;
}
}
// Add a term containing all the uppercase letters (CamelCase World BBox => CCWBB)
var initialList = Regex.Split(item.Name, @"\P{Lu}+");
var initials = string.Concat(initialList).Trim();
if (!string.IsNullOrEmpty(initials))
terms.Add(new ValueTuple<string, float>(initials.ToLower(), 0.5f));
m_Index.Add(item.Path, terms);
}
}
}
static IList<string> Tokenize(string s)
{
var knownTokens = new HashSet<string>();
var tokens = new List<string>();
// Split on word boundaries
foreach (var t in Regex.Split(s, @"\W"))
{
// Split camel case words
var tt = Regex.Split(t, @"(\p{Lu}+\P{Lu}*)");
foreach (var ttt in tt)
{
var tttt = ttt.Trim();
if (!string.IsNullOrEmpty(tttt) && !knownTokens.Contains(tttt))
{
knownTokens.Add(tttt);
tokens.Add(tttt);
}
}
}
return tokens;
}
bool Match(IReadOnlyList<string> tokenizedQuery, string itemPath, out float score)
{
itemPath = itemPath.Trim();
if (itemPath == "")
{
if (tokenizedQuery.Count == 0)
{
score = 1;
return true;
}
else
{
score = 0;
return false;
}
}
IReadOnlyList<ValueTuple<string, float>> indexTerms;
if (!m_Index.TryGetValue(itemPath, out indexTerms))
{
score = 0;
return false;
}
float maxScore = 0.0f;
foreach (var t in indexTerms)
{
float scoreForTerm = 0f;
var querySuite = "";
var querySuiteFactor = 1.25f;
foreach (var q in tokenizedQuery)
{
if (t.Item1.StartsWith(q))
{
scoreForTerm += t.Item2 * q.Length / t.Item1.Length;
}
if (querySuite.Length > 0)
{
querySuite += " " + q;
if (t.Item1.StartsWith(querySuite))
{
scoreForTerm += t.Item2 * querySuiteFactor * querySuite.Length / t.Item1.Length;
}
}
else
{
querySuite = q;
}
querySuiteFactor *= querySuiteFactor;
}
maxScore = Mathf.Max(maxScore, scoreForTerm);
}
score = maxScore;
return score > 0;
}
}
}