using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text.RegularExpressions; using System.Threading.Tasks; using JetBrains.Annotations; using UnityEngine; namespace UnityEditor.Searcher { [PublicAPI] public class SearcherDatabase : SearcherDatabaseBase { Dictionary>> m_Index = new Dictionary>>(); class Result { public SearcherItem item; public float maxScore; } const bool k_IsParallel = true; public Func MatchFilter { get; set; } public static SearcherDatabase Create( List items, string databaseDirectory, bool serializeToFile = true ) { if (serializeToFile && databaseDirectory != null && !Directory.Exists(databaseDirectory)) Directory.CreateDirectory(databaseDirectory); var database = new SearcherDatabase(databaseDirectory, items); if (serializeToFile) database.SerializeToFile(); database.BuildIndex(); return database; } public static SearcherDatabase Load(string databaseDirectory) { if (!Directory.Exists(databaseDirectory)) throw new InvalidOperationException("databaseDirectory not found."); var database = new SearcherDatabase(databaseDirectory, null); database.LoadFromFile(); database.BuildIndex(); return database; } public SearcherDatabase(IReadOnlyCollection db) : this("", db) { } SearcherDatabase(string databaseDirectory, IReadOnlyCollection db) : base(databaseDirectory) { m_ItemList = new List(); var nextId = 0; if (db != null) foreach (var item in db) AddItemToIndex(item, ref nextId, null); } public override List Search(string query, out float localMaxScore) { // Match assumes the query is trimmed query = query.Trim(' ', '\t'); localMaxScore = 0; if (string.IsNullOrWhiteSpace(query)) { if (MatchFilter == null) return m_ItemList; // ReSharper disable once RedundantLogicalConditionalExpressionOperand if (k_IsParallel && m_ItemList.Count > 100) return FilterMultiThreaded(query); return FilterSingleThreaded(query); } var finalResults = new List { null }; var max = new Result(); var tokenizedQuery = new List(); foreach (var token in Tokenize(query)) { tokenizedQuery.Add(token.Trim().ToLower()); } // ReSharper disable once RedundantLogicalConditionalExpressionOperand if (k_IsParallel && m_ItemList.Count > 100) SearchMultithreaded(query, tokenizedQuery, max, finalResults); else SearchSingleThreaded(query, tokenizedQuery, max, finalResults); localMaxScore = max.maxScore; if (max.item != null) finalResults[0] = max.item; else finalResults.RemoveAt(0); return finalResults; } protected virtual bool Match(string query, IReadOnlyList tokenizedQuery, SearcherItem item, out float score) { var filter = MatchFilter?.Invoke(query, item) ?? true; return Match(tokenizedQuery, item.Path, out score) && filter; } List FilterSingleThreaded(string query) { var result = new List(); foreach (var searcherItem in m_ItemList) { if (!MatchFilter.Invoke(query, searcherItem)) continue; result.Add(searcherItem); } return result; } List FilterMultiThreaded(string query) { var result = new List(); var count = Environment.ProcessorCount; var tasks = new Task[count]; var lists = new List[count]; var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count); for (var i = 0; i < count; i++) { var i1 = i; tasks[i] = Task.Run(() => { lists[i1] = new List(); for (var j = 0; j < itemsPerTask; j++) { var index = j + itemsPerTask * i1; if (index >= m_ItemList.Count) break; var item = m_ItemList[index]; if (!MatchFilter.Invoke(query, item)) continue; lists[i1].Add(item); } }); } Task.WaitAll(tasks); for (var i = 0; i < count; i++) { result.AddRange(lists[i]); } return result; } readonly float k_ScoreCutOff = 0.33f; void SearchSingleThreaded(string query, IReadOnlyList tokenizedQuery, Result max, ICollection finalResults) { List results = new List(); foreach (var item in m_ItemList) { float score = 0; if (query.Length == 0 || Match(query, tokenizedQuery, item, out score)) { if (score > max.maxScore) { max.item = item; max.maxScore = score; } results.Add(new Result() { item = item, maxScore = score}); } } PostprocessResults(results, finalResults, max); } void SearchMultithreaded(string query, IReadOnlyList tokenizedQuery, Result max, List finalResults) { var count = Environment.ProcessorCount; var tasks = new Task[count]; var localResults = new Result[count]; var queue = new ConcurrentQueue(); var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count); for (var i = 0; i < count; i++) { var i1 = i; localResults[i1] = new Result(); tasks[i] = Task.Run(() => { var result = localResults[i1]; for (var j = 0; j < itemsPerTask; j++) { var index = j + itemsPerTask * i1; if (index >= m_ItemList.Count) break; var item = m_ItemList[index]; float score = 0; if (query.Length == 0 || Match(query, tokenizedQuery, item, out score)) { if (score > result.maxScore) { result.maxScore = score; result.item = item; } queue.Enqueue(new Result { item = item, maxScore = score }); } } }); } Task.WaitAll(tasks); for (var i = 0; i < count; i++) { if (localResults[i].maxScore > max.maxScore) { max.maxScore = localResults[i].maxScore; max.item = localResults[i].item; } } PostprocessResults(queue, finalResults, max); } void PostprocessResults(IEnumerable results, ICollection items, Result max) { foreach (var result in results) { var normalizedScore = result.maxScore / max.maxScore; if (result.item != null && result.item != max.item && normalizedScore > k_ScoreCutOff) { items.Add(result.item); } } } public override void BuildIndex() { m_Index.Clear(); foreach (var item in m_ItemList) { if (!m_Index.ContainsKey(item.Path)) { List> terms = new List>(); // If the item uses synonyms to return results for similar words/phrases, add them to the search terms IList tokens = null; if (item.Synonyms == null) tokens = Tokenize(item.Name); else tokens = Tokenize(string.Format("{0} {1}", item.Name, string.Join(" ", item.Synonyms))); // Fixes bug: https://fogbugz.unity3d.com/f/cases/1359158/ // Without this, node names with spaces or those with Pascal casing were not added to index var nodeName = item.Name.ToLower().Replace(" ", String.Empty); tokens.Add(nodeName); string tokenSuite = ""; foreach (var token in tokens) { var t = token.ToLower(); if (t.Length > 1) { terms.Add(new ValueTuple(t, 0.8f)); } if (tokenSuite.Length > 0) { tokenSuite += " " + t; terms.Add(new ValueTuple(tokenSuite, 1f)); } else { tokenSuite = t; } } // Add a term containing all the uppercase letters (CamelCase World BBox => CCWBB) var initialList = Regex.Split(item.Name, @"\P{Lu}+"); var initials = string.Concat(initialList).Trim(); if (!string.IsNullOrEmpty(initials)) terms.Add(new ValueTuple(initials.ToLower(), 0.5f)); m_Index.Add(item.Path, terms); } } } static IList Tokenize(string s) { var knownTokens = new HashSet(); var tokens = new List(); // Split on word boundaries foreach (var t in Regex.Split(s, @"\W")) { // Split camel case words var tt = Regex.Split(t, @"(\p{Lu}+\P{Lu}*)"); foreach (var ttt in tt) { var tttt = ttt.Trim(); if (!string.IsNullOrEmpty(tttt) && !knownTokens.Contains(tttt)) { knownTokens.Add(tttt); tokens.Add(tttt); } } } return tokens; } bool Match(IReadOnlyList tokenizedQuery, string itemPath, out float score) { itemPath = itemPath.Trim(); if (itemPath == "") { if (tokenizedQuery.Count == 0) { score = 1; return true; } else { score = 0; return false; } } IReadOnlyList> indexTerms; if (!m_Index.TryGetValue(itemPath, out indexTerms)) { score = 0; return false; } float maxScore = 0.0f; foreach (var t in indexTerms) { float scoreForTerm = 0f; var querySuite = ""; var querySuiteFactor = 1.25f; foreach (var q in tokenizedQuery) { if (t.Item1.StartsWith(q)) { scoreForTerm += t.Item2 * q.Length / t.Item1.Length; } if (querySuite.Length > 0) { querySuite += " " + q; if (t.Item1.StartsWith(querySuite)) { scoreForTerm += t.Item2 * querySuiteFactor * querySuite.Length / t.Item1.Length; } } else { querySuite = q; } querySuiteFactor *= querySuiteFactor; } maxScore = Mathf.Max(maxScore, scoreForTerm); } score = maxScore; return score > 0; } } }