399 lines
13 KiB
C#
399 lines
13 KiB
C#
using System;
|
|
using System.Collections.Concurrent;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Linq;
|
|
using System.Text.RegularExpressions;
|
|
using System.Threading.Tasks;
|
|
using JetBrains.Annotations;
|
|
using UnityEngine;
|
|
|
|
namespace UnityEditor.Searcher
|
|
{
|
|
[PublicAPI]
|
|
public class SearcherDatabase : SearcherDatabaseBase
|
|
{
|
|
Dictionary<string, IReadOnlyList<ValueTuple<string, float>>> m_Index = new Dictionary<string, IReadOnlyList<ValueTuple<string, float>>>();
|
|
|
|
class Result
|
|
{
|
|
public SearcherItem item;
|
|
public float maxScore;
|
|
}
|
|
|
|
const bool k_IsParallel = true;
|
|
|
|
public Func<string, SearcherItem, bool> MatchFilter { get; set; }
|
|
|
|
public static SearcherDatabase Create(
|
|
List<SearcherItem> items,
|
|
string databaseDirectory,
|
|
bool serializeToFile = true
|
|
)
|
|
{
|
|
if (serializeToFile && databaseDirectory != null && !Directory.Exists(databaseDirectory))
|
|
Directory.CreateDirectory(databaseDirectory);
|
|
|
|
var database = new SearcherDatabase(databaseDirectory, items);
|
|
|
|
if (serializeToFile)
|
|
database.SerializeToFile();
|
|
|
|
database.BuildIndex();
|
|
return database;
|
|
}
|
|
|
|
public static SearcherDatabase Load(string databaseDirectory)
|
|
{
|
|
if (!Directory.Exists(databaseDirectory))
|
|
throw new InvalidOperationException("databaseDirectory not found.");
|
|
|
|
var database = new SearcherDatabase(databaseDirectory, null);
|
|
database.LoadFromFile();
|
|
database.BuildIndex();
|
|
|
|
return database;
|
|
}
|
|
|
|
public SearcherDatabase(IReadOnlyCollection<SearcherItem> db)
|
|
: this("", db)
|
|
{
|
|
}
|
|
|
|
SearcherDatabase(string databaseDirectory, IReadOnlyCollection<SearcherItem> db)
|
|
: base(databaseDirectory)
|
|
{
|
|
m_ItemList = new List<SearcherItem>();
|
|
var nextId = 0;
|
|
|
|
if (db != null)
|
|
foreach (var item in db)
|
|
AddItemToIndex(item, ref nextId, null);
|
|
}
|
|
|
|
public override List<SearcherItem> Search(string query, out float localMaxScore)
|
|
{
|
|
// Match assumes the query is trimmed
|
|
query = query.Trim(' ', '\t');
|
|
localMaxScore = 0;
|
|
|
|
if (string.IsNullOrWhiteSpace(query))
|
|
{
|
|
if (MatchFilter == null)
|
|
return m_ItemList;
|
|
|
|
// ReSharper disable once RedundantLogicalConditionalExpressionOperand
|
|
if (k_IsParallel && m_ItemList.Count > 100)
|
|
return FilterMultiThreaded(query);
|
|
|
|
return FilterSingleThreaded(query);
|
|
}
|
|
|
|
var finalResults = new List<SearcherItem> { null };
|
|
var max = new Result();
|
|
var tokenizedQuery = new List<string>();
|
|
foreach (var token in Tokenize(query))
|
|
{
|
|
tokenizedQuery.Add(token.Trim().ToLower());
|
|
}
|
|
|
|
// ReSharper disable once RedundantLogicalConditionalExpressionOperand
|
|
if (k_IsParallel && m_ItemList.Count > 100)
|
|
SearchMultithreaded(query, tokenizedQuery, max, finalResults);
|
|
else
|
|
SearchSingleThreaded(query, tokenizedQuery, max, finalResults);
|
|
|
|
localMaxScore = max.maxScore;
|
|
if (max.item != null)
|
|
finalResults[0] = max.item;
|
|
else
|
|
finalResults.RemoveAt(0);
|
|
|
|
return finalResults;
|
|
}
|
|
|
|
protected virtual bool Match(string query, IReadOnlyList<string> tokenizedQuery, SearcherItem item, out float score)
|
|
{
|
|
var filter = MatchFilter?.Invoke(query, item) ?? true;
|
|
return Match(tokenizedQuery, item.Path, out score) && filter;
|
|
}
|
|
|
|
List<SearcherItem> FilterSingleThreaded(string query)
|
|
{
|
|
var result = new List<SearcherItem>();
|
|
|
|
foreach (var searcherItem in m_ItemList)
|
|
{
|
|
if (!MatchFilter.Invoke(query, searcherItem))
|
|
continue;
|
|
|
|
result.Add(searcherItem);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
List<SearcherItem> FilterMultiThreaded(string query)
|
|
{
|
|
var result = new List<SearcherItem>();
|
|
var count = Environment.ProcessorCount;
|
|
var tasks = new Task[count];
|
|
var lists = new List<SearcherItem>[count];
|
|
var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count);
|
|
|
|
for (var i = 0; i < count; i++)
|
|
{
|
|
var i1 = i;
|
|
tasks[i] = Task.Run(() =>
|
|
{
|
|
lists[i1] = new List<SearcherItem>();
|
|
|
|
for (var j = 0; j < itemsPerTask; j++)
|
|
{
|
|
var index = j + itemsPerTask * i1;
|
|
if (index >= m_ItemList.Count)
|
|
break;
|
|
|
|
var item = m_ItemList[index];
|
|
if (!MatchFilter.Invoke(query, item))
|
|
continue;
|
|
|
|
lists[i1].Add(item);
|
|
}
|
|
});
|
|
}
|
|
|
|
Task.WaitAll(tasks);
|
|
|
|
for (var i = 0; i < count; i++)
|
|
{
|
|
result.AddRange(lists[i]);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
readonly float k_ScoreCutOff = 0.33f;
|
|
|
|
void SearchSingleThreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, ICollection<SearcherItem> finalResults)
|
|
{
|
|
List<Result> results = new List<Result>();
|
|
|
|
foreach (var item in m_ItemList)
|
|
{
|
|
float score = 0;
|
|
if (query.Length == 0 || Match(query, tokenizedQuery, item, out score))
|
|
{
|
|
if (score > max.maxScore)
|
|
{
|
|
max.item = item;
|
|
max.maxScore = score;
|
|
}
|
|
results.Add(new Result() { item = item, maxScore = score});
|
|
}
|
|
}
|
|
|
|
PostprocessResults(results, finalResults, max);
|
|
}
|
|
|
|
void SearchMultithreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, List<SearcherItem> finalResults)
|
|
{
|
|
var count = Environment.ProcessorCount;
|
|
var tasks = new Task[count];
|
|
var localResults = new Result[count];
|
|
var queue = new ConcurrentQueue<Result>();
|
|
var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count);
|
|
|
|
for (var i = 0; i < count; i++)
|
|
{
|
|
var i1 = i;
|
|
localResults[i1] = new Result();
|
|
tasks[i] = Task.Run(() =>
|
|
{
|
|
var result = localResults[i1];
|
|
for (var j = 0; j < itemsPerTask; j++)
|
|
{
|
|
var index = j + itemsPerTask * i1;
|
|
if (index >= m_ItemList.Count)
|
|
break;
|
|
var item = m_ItemList[index];
|
|
float score = 0;
|
|
if (query.Length == 0 || Match(query, tokenizedQuery, item, out score))
|
|
{
|
|
if (score > result.maxScore)
|
|
{
|
|
result.maxScore = score;
|
|
result.item = item;
|
|
}
|
|
|
|
queue.Enqueue(new Result { item = item, maxScore = score });
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
Task.WaitAll(tasks);
|
|
|
|
for (var i = 0; i < count; i++)
|
|
{
|
|
if (localResults[i].maxScore > max.maxScore)
|
|
{
|
|
max.maxScore = localResults[i].maxScore;
|
|
max.item = localResults[i].item;
|
|
}
|
|
}
|
|
|
|
PostprocessResults(queue, finalResults, max);
|
|
}
|
|
|
|
void PostprocessResults(IEnumerable<Result> results, ICollection<SearcherItem> items, Result max)
|
|
{
|
|
foreach (var result in results)
|
|
{
|
|
var normalizedScore = result.maxScore / max.maxScore;
|
|
if (result.item != null && result.item != max.item && normalizedScore > k_ScoreCutOff)
|
|
{
|
|
items.Add(result.item);
|
|
}
|
|
}
|
|
}
|
|
|
|
public override void BuildIndex()
|
|
{
|
|
m_Index.Clear();
|
|
|
|
foreach (var item in m_ItemList)
|
|
{
|
|
if (!m_Index.ContainsKey(item.Path))
|
|
{
|
|
List<ValueTuple<string, float>> terms = new List<ValueTuple<string, float>>();
|
|
|
|
// If the item uses synonyms to return results for similar words/phrases, add them to the search terms
|
|
IList<string> tokens = null;
|
|
if (item.Synonyms == null)
|
|
tokens = Tokenize(item.Name);
|
|
else
|
|
tokens = Tokenize(string.Format("{0} {1}", item.Name, string.Join(" ", item.Synonyms)));
|
|
|
|
// Fixes bug: https://fogbugz.unity3d.com/f/cases/1359158/
|
|
// Without this, node names with spaces or those with Pascal casing were not added to index
|
|
var nodeName = item.Name.ToLower().Replace(" ", String.Empty);
|
|
tokens.Add(nodeName);
|
|
|
|
string tokenSuite = "";
|
|
foreach (var token in tokens)
|
|
{
|
|
var t = token.ToLower();
|
|
if (t.Length > 1)
|
|
{
|
|
terms.Add(new ValueTuple<string, float>(t, 0.8f));
|
|
}
|
|
|
|
if (tokenSuite.Length > 0)
|
|
{
|
|
tokenSuite += " " + t;
|
|
terms.Add(new ValueTuple<string, float>(tokenSuite, 1f));
|
|
}
|
|
else
|
|
{
|
|
tokenSuite = t;
|
|
}
|
|
}
|
|
|
|
// Add a term containing all the uppercase letters (CamelCase World BBox => CCWBB)
|
|
var initialList = Regex.Split(item.Name, @"\P{Lu}+");
|
|
var initials = string.Concat(initialList).Trim();
|
|
if (!string.IsNullOrEmpty(initials))
|
|
terms.Add(new ValueTuple<string, float>(initials.ToLower(), 0.5f));
|
|
|
|
m_Index.Add(item.Path, terms);
|
|
}
|
|
}
|
|
}
|
|
|
|
static IList<string> Tokenize(string s)
|
|
{
|
|
var knownTokens = new HashSet<string>();
|
|
var tokens = new List<string>();
|
|
|
|
// Split on word boundaries
|
|
foreach (var t in Regex.Split(s, @"\W"))
|
|
{
|
|
// Split camel case words
|
|
var tt = Regex.Split(t, @"(\p{Lu}+\P{Lu}*)");
|
|
foreach (var ttt in tt)
|
|
{
|
|
var tttt = ttt.Trim();
|
|
if (!string.IsNullOrEmpty(tttt) && !knownTokens.Contains(tttt))
|
|
{
|
|
knownTokens.Add(tttt);
|
|
tokens.Add(tttt);
|
|
}
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
bool Match(IReadOnlyList<string> tokenizedQuery, string itemPath, out float score)
|
|
{
|
|
itemPath = itemPath.Trim();
|
|
if (itemPath == "")
|
|
{
|
|
if (tokenizedQuery.Count == 0)
|
|
{
|
|
score = 1;
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
score = 0;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
IReadOnlyList<ValueTuple<string, float>> indexTerms;
|
|
if (!m_Index.TryGetValue(itemPath, out indexTerms))
|
|
{
|
|
score = 0;
|
|
return false;
|
|
}
|
|
|
|
float maxScore = 0.0f;
|
|
foreach (var t in indexTerms)
|
|
{
|
|
float scoreForTerm = 0f;
|
|
var querySuite = "";
|
|
var querySuiteFactor = 1.25f;
|
|
foreach (var q in tokenizedQuery)
|
|
{
|
|
if (t.Item1.StartsWith(q))
|
|
{
|
|
scoreForTerm += t.Item2 * q.Length / t.Item1.Length;
|
|
}
|
|
|
|
if (querySuite.Length > 0)
|
|
{
|
|
querySuite += " " + q;
|
|
if (t.Item1.StartsWith(querySuite))
|
|
{
|
|
scoreForTerm += t.Item2 * querySuiteFactor * querySuite.Length / t.Item1.Length;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
querySuite = q;
|
|
}
|
|
|
|
querySuiteFactor *= querySuiteFactor;
|
|
}
|
|
|
|
maxScore = Mathf.Max(maxScore, scoreForTerm);
|
|
}
|
|
|
|
score = maxScore;
|
|
return score > 0;
|
|
}
|
|
}
|
|
}
|