Q

byte [] array pattern search

2008-11-12 4 views 58 likes

58

Кто-нибудь знает хороший и эффективный способ поиска/сопоставления байтового шаблона в массиве byte [], а затем возвращает позиции.byte [] array pattern search

Например

byte[] pattern = new byte[] {12,3,5,76,8,0,6,125}; 

byte[] toBeSearched = new byte[] {23,36,43,76,125,56,34,234,12,3,5,76,8,0,6,125,234,56,211,122,22,4,7,89,76,64,12,3,5,76,8,0,6,125}

источник

2008-11-12 Anders R

A

ответ

-1

Вы можете поместить массив байтов в String и запустить матч по IndexOf. Или вы можете хотя бы повторно использовать existing algorithms на соответствие строк.

[STAThread] 
    static void Main(string[] args) 
    { 
     byte[] pattern = new byte[] {12,3,5,76,8,0,6,125}; 
     byte[] toBeSearched = new byte[] {23,36,43,76,125,56,34,234,12,3,5,76,8,0,6,125,234,56,211,122,22,4,7,89,76,64,12,3,5,76,8,0,6,125}; 
     string needle, haystack; 

     unsafe 
     { 
      fixed(byte * p = pattern) { 
       needle = new string((SByte *) p, 0, pattern.Length); 
      } // fixed 

      fixed (byte * p2 = toBeSearched) 
      { 
       haystack = new string((SByte *) p2, 0, toBeSearched.Length); 
      } // fixed 

      int i = haystack.IndexOf(needle, 0); 
      System.Console.Out.WriteLine(i); 
     } 
    }

источник

2008-11-12 10:00:24

+0

Вы пробовали? Давайте посмотрим код! – 2008-11-12 10:05:07

+0

Хммм, не уверен в необходимости небезопасности! кажется немного резким ... – 2008-11-12 10:24:58

+0

ваш код только окунает первое появление, но вопрос подразумевает все совпадения ... – 2008-11-12 10:26:06

-3

toBeSearched.Except (шаблон) возвратит вам разницу toBeSearched.Intersect (шаблон) будет производить множество пересечений Как правило, вы должны смотреть в расширенные методы в расширениях Linq

источник

2008-11-12 10:22:58 Tamir

7

Мое решение:

class Program 
{ 
    public static void Main() 
    { 
     byte[] pattern = new byte[] {12,3,5,76,8,0,6,125}; 

     byte[] toBeSearched = new byte[] { 23, 36, 43, 76, 125, 56, 34, 234, 12, 3, 5, 76, 8, 0, 6, 125, 234, 56, 211, 122, 22, 4, 7, 89, 76, 64, 12, 3, 5, 76, 8, 0, 6, 125}; 

     List<int> positions = SearchBytePattern(pattern, toBeSearched); 

     foreach (var item in positions) 
     { 
      Console.WriteLine("Pattern matched at pos {0}", item); 
     } 

    } 

    static public List<int> SearchBytePattern(byte[] pattern, byte[] bytes) 
    { 
     List<int> positions = new List<int>(); 
     int patternLength = pattern.Length; 
     int totalLength = bytes.Length; 
     byte firstMatchByte = pattern[0]; 
     for (int i = 0; i < totalLength; i++) 
     { 
      if (firstMatchByte == bytes[i] && totalLength - i >= patternLength) 
      { 
       byte[] match = new byte[patternLength]; 
       Array.Copy(bytes, i, match, 0, patternLength); 
       if (match.SequenceEqual<byte>(pattern)) 
       { 
        positions.Add(i); 
        i += patternLength - 1; 
       } 
      } 
     } 
     return positions; 
    } 
}

источник

2008-11-12 10:55:03

+1

почему array.copy? просто становится медленнее таким образом. Я предполагаю, что это просто потому, что вы хотите использовать SequenceEqual, но это может немного поработать, потому что вы хотите использовать метод расширения. «I + = patternLength - 1;» часть хорошая! – 2008-11-12 13:24:29

+3

Вы не должны давать -1 всем, потому что решение не идеально ... В этой ситуации вы должны проголосовать только за решение, которое, по вашему мнению, является лучшим. – 2008-11-12 14:24:14

+0

Не промахиваются ли эти совпадения? (например, BOB будет найден только один раз в BOBOB) – Jeff 2012-02-28 20:25:36

44

Могу ли я предложить что-то, что не требует создания строк, копирование массивов или небезопасный код:

using System; 
using System.Collections.Generic; 

static class ByteArrayRocks { 

    static readonly int [] Empty = new int [0]; 

    public static int [] Locate (this byte [] self, byte [] candidate) 
    { 
     if (IsEmptyLocate (self, candidate)) 
      return Empty; 

     var list = new List<int>(); 

     for (int i = 0; i < self.Length; i++) { 
      if (!IsMatch (self, i, candidate)) 
       continue; 

      list.Add (i); 
     } 

     return list.Count == 0 ? Empty : list.ToArray(); 
    } 

    static bool IsMatch (byte [] array, int position, byte [] candidate) 
    { 
     if (candidate.Length > (array.Length - position)) 
      return false; 

     for (int i = 0; i < candidate.Length; i++) 
      if (array [position + i] != candidate [i]) 
       return false; 

     return true; 
    } 

    static bool IsEmptyLocate (byte [] array, byte [] candidate) 
    { 
     return array == null 
      || candidate == null 
      || array.Length == 0 
      || candidate.Length == 0 
      || candidate.Length > array.Length; 
    } 

    static void Main() 
    { 
     var data = new byte [] { 23, 36, 43, 76, 125, 56, 34, 234, 12, 3, 5, 76, 8, 0, 6, 125, 234, 56, 211, 122, 22, 4, 7, 89, 76, 64, 12, 3, 5, 76, 8, 0, 6, 125 }; 
     var pattern = new byte [] { 12, 3, 5, 76, 8, 0, 6, 125 }; 

     foreach (var position in data.Locate (pattern)) 
      Console.WriteLine (position); 
    } 
}

Edit (по IAbstract) - движущемся содержание post здесь, так как это не ответ

Из любопытства, я создал небольшой тест с различными ответами.

Вот результаты за миллион итераций:

solution [Locate]:   00:00:00.7714027 
solution [FindAll]:   00:00:03.5404399 
solution [SearchBytePattern]: 00:00:01.1105190 
solution [MatchBytePattern]: 00:00:03.0658212

источник

2008-11-12 11:21:36

+3

Ваше решение медленное в массиве больших байтов. – Tomas 2011-06-30 11:25:36

0

Вот мой (не самый производительный) решение. Он полагается на то, что преобразование байт/латин-1 без потерь, которое равно , а не true для преобразований байтов/ASCII или байтов/UTF8.

Достоинствами являются то, что It Works (tm) для любых значений байтов (некоторые другие решения работают неправильно с байтами 0x80-0xff) и могут быть расширены для выполнения более совершенного регулярного выражения .

using System; 
using System.Collections.Generic; 
using System.Text; 
using System.Text.RegularExpressions; 

class C { 

    public static void Main() { 
    byte[] data = {0, 100, 0, 255, 100, 0, 100, 0, 255}; 
    byte[] pattern = {0, 255}; 
    foreach (int i in FindAll(data, pattern)) { 
     Console.WriteLine(i); 
    } 
    } 

    public static IEnumerable<int> FindAll(
    byte[] haystack, 
    byte[] needle 
) { 
    // bytes <-> latin-1 conversion is lossless 
    Encoding latin1 = Encoding.GetEncoding("iso-8859-1"); 
    string sHaystack = latin1.GetString(haystack); 
    string sNeedle = latin1.GetString(needle); 
    for (Match m = Regex.Match(sHaystack, Regex.Escape(sNeedle)); 
     m.Success; m = m.NextMatch()) { 
     yield return m.Index; 
    } 
    } 
}

источник

2008-11-12 11:27:43 Constantin

1

Я хотел бы использовать решение, которое делает соответствующий путем преобразования в строку ...

Вы должны написать простую функцию, реализующую Knuth-Morris-Pratt searching algorithm. Это будет самый быстрый простой алгоритм, который вы можете использовать для поиска правильных индексов. (Вы можете использовать Boyer-Moore, но для этого потребуется больше настроек.

После того как вы оптимизировали алгоритм, вы можете попытаться найти другие виды оптимизации. Но вы должны начать с основ.

Например, текущая «быстрый» является Расположить решение по Jb Эвиане.

если посмотреть на ядро

for (int i = 0; i < self.Length; i++) { 
      if (!IsMatch (self, i, candidate)) 
        continue; 

      list.Add (i); 
    }

После матча из под-алгоритм, он начнет находить совпадение в i + 1, но вы уже знаете, что первым возможным совпадением будет i + кандидат. Длина.Так что, если вы добавите,

i += candidate.Length -2; // -2 instead of -1 because the i++ will add the last index

это будет намного быстрее, когда вы ожидаете много вхождений подмножества в надмножестве. (Бруно Конде уже делает это в своем решении)

Но это всего лишь половина алгоритма KNP, вы также должны добавить дополнительный параметр к методу IsMatch, называемому numberOfValidMatches, который будет параметром out.

это разрешило бы к следующему:

int validMatches = 0; 
if (!IsMatch (self, i, candidate, out validMatches)) 
{ 
    i += validMatches - 1; // -1 because the i++ will do the last one 
    continue; 
}

и

static bool IsMatch (byte [] array, int position, byte [] candidate, out int numberOfValidMatches) 
{ 
    numberOfValidMatches = 0; 
    if (candidate.Length > (array.Length - position)) 
      return false; 

    for (i = 0; i < candidate.Length; i++) 
    { 
      if (array [position + i] != candidate [i]) 
        return false; 
      numberOfValidMatches++; 
    } 

    return true; 
}

немного рефакторинга, и вы можете использовать numberOfValidMatches в качестве переменной цикла, и переписать Расположить петлю, используя какое-то время чтобы избежать -2 и -1. Но я просто хотел пояснить, как вы можете добавить алгоритм KMP.

источник

2008-11-12 13:14:49

12

Используйте эффективный Boyer-Moore algorithm.

Он предназначен для поиска строк со строками, но вам нужно мало воображения, чтобы проецировать это на байтовые массивы.

В общем, лучший ответ: используйте любой алгоритм поиска строк, который вам нравится :).

источник

2008-11-12 13:21:25 VVS

1

ответ Jb Evain имеет:

for (int i = 0; i < self.Length; i++) { 
     if (!IsMatch (self, i, candidate)) 
      continue; 
     list.Add (i); 
}

, а затем функция IsMatch сначала проверяет, идет ли candidate за пределы длины массива ищется.

Это было бы более эффективным, если петля for были закодированы:

for (int i = 0; i < self.Length - candidate.Length; i++) { 
     if (!IsMatch (self, i, candidate)) 
      continue; 
     list.Add (i); 
}

в этот момент один может также устранить тест с начала IsMatch, до тех пор пока вы не сжиматься с помощью предварительных условий никогда назвать его «незаконными» параметрами.

источник

2008-11-12 13:34:51 Alnitak

1

Я создал новую функцию, используя подсказки от моего ответа и ответ на Альнитаке.

public static List<Int32> LocateSubset(Byte[] superSet, Byte[] subSet) 
{ 
    if ((superSet == null) || (subSet == null)) 
    { 
     throw new ArgumentNullException(); 
    } 
    if ((superSet.Length < subSet.Length) || (superSet.Length == 0) || (subSet.Length == 0)) 
    { 
     return new List<Int32>(); 
    } 
    var result = new List<Int32>(); 
    Int32 currentIndex = 0; 
    Int32 maxIndex = superSet.Length - subSet.Length; 
    while (currentIndex < maxIndex) 
    { 
     Int32 matchCount = CountMatches(superSet, currentIndex, subSet); 
     if (matchCount == subSet.Length) 
     { 
      result.Add(currentIndex); 
     } 
     currentIndex++; 
     if (matchCount > 0) 
     { 
      currentIndex += matchCount - 1; 
     } 
    } 
    return result; 
} 

private static Int32 CountMatches(Byte[] superSet, int startIndex, Byte[] subSet) 
{ 
    Int32 currentOffset = 0; 
    while (currentOffset < subSet.Length) 
    { 
     if (superSet[startIndex + currentOffset] != subSet[currentOffset]) 
     { 
      break; 
     } 
     currentOffset++; 
    } 
    return currentOffset; 
}

только часть я не так доволен является

  currentIndex++; 
     if (matchCount > 0) 
     { 
      currentIndex += matchCount - 1; 
     }

часть ... Я бы, как использовать, если еще, чтобы избежать -1, но это приводит к предсказанию лучше ветви (хотя я не уверен, что это будет иметь большое значение).

источник

2008-11-12 14:05:54

1

Зачем делать простые сложные? Это можно сделать на любом языке, использующем для циклов. Вот один в C#:

 
using System; 
using System.Collections.Generic; 

namespace BinarySearch 
{ 
    class Program 
    { 
     static void Main(string[] args) 
     { 
      byte[] pattern = new byte[] {12,3,5,76,8,0,6,125}; 
      byte[] toBeSearched = new byte[] {23,36,43,76,125,56,34,234,12,3,5,76,8,0,6,125,234,56,211,
122,22,4,7,89,76,64,12,3,5,76,8,0,6,125}; 

      List<int> occurences = findOccurences(toBeSearched, pattern); 

      foreach(int occurence in occurences) { 
       Console.WriteLine("Found match starting at 0-based index: " + occurence); 
      } 


     } 

     static List<int> findOccurences(byte[] haystack, byte[] needle) 
     { 
      List<int> occurences = new List<int>(); 

      for (int i = 0; i < haystack.Length; i++) 
      { 
       if (needle[0] == haystack[i]) 
       { 
        bool found = true; 
        int j, k; 
        for (j = 0, k = i; j < needle.Length; j++, k++) 
        { 
         if (k >= haystack.Length || needle[j] != haystack[k]) 
         { 
          found = false; 
          break; 
         } 
        } 
        if (found) 
        { 
         occurences.Add(i - 1); 
         i = k; 
        } 
       } 
      } 
      return occurences; 
     } 
    } 
}

источник

2008-11-12 14:31:30

1

спасибо, что нашли время ...

Это код, который я использовал/тестирование с прежде чем я спросил мой вопрос ... Поэтому я задаю этот вопрос был в том, что я уверен, что я не использую оптимальный код для этого ... так что еще раз спасибо за то, что нашли время!

private static int CountPatternMatches(byte[] pattern, byte[] bytes) 
    { 
     int counter = 0; 

     for (int i = 0; i < bytes.Length; i++) 
     { 
      if (bytes[i] == pattern[0] && (i + pattern.Length) < bytes.Length) 
      { 
       for (int x = 1; x < pattern.Length; x++) 
       { 
        if (pattern[x] != bytes[x+i]) 
        { 
         break; 
        } 

        if (x == pattern.Length -1) 
        { 
         counter++; 
         i = i + pattern.Length; 
        } 
       } 
      } 
     } 

     return counter; 
    }

Любой, кто видит ошибки в моем коде? Это считается хакерским подходом? Я пробовал почти каждый образец, который вы, ребята, разместили, и я, кажется, получаю некоторые изменения в результатах матчей. Я выполнял свои тесты с массивом байтов размером ~ 10 Мб в качестве массива toBeSearched.

источник

2008-11-12 14:44:18

11

Первоначально я отправил некоторый старый код, который я использовал, но мне было любопытно, что benchmarks Jb Evain. Я обнаружил, что мое решение было глупо медленным. Похоже, что buno conde's SearchBytePattern является самым быстрым. Я не мог понять, почему, особенно потому, что он использует метод Array.Copy и Extension. Но в тестах Jb есть доказательство, так что, к счастью, к bruno.

Я упростил бит еще больше, поэтому, надеюсь, это будет самое ясное и простое решение. (Вся тяжелая работа, выполняемая BRUNO Conde) Улучшения являются:

Buffer.BlockCopy
Array.indexOf < байт >
во время цикла вместо петли для
параметра индекс начала

преобразуется в метод расширения

public static List<int> IndexOfSequence(this byte[] buffer, byte[] pattern, int startIndex)  
{ 
    List<int> positions = new List<int>(); 
    int i = Array.IndexOf<byte>(buffer, pattern[0], startIndex); 
    while (i >= 0 && i <= buffer.Length - pattern.Length) 
    { 
     byte[] segment = new byte[pattern.Length]; 
     Buffer.BlockCopy(buffer, i, segment, 0, pattern.Length);  
     if (segment.SequenceEqual<byte>(pattern)) 
      positions.Add(i); 
     i = Array.IndexOf<byte>(buffer, pattern[0], i + pattern.Length); 
    } 
    return positions;  
}

источник

2008-12-02 00:18:54 GoClimbColorado

1

Скорость еще не все. Вы проверяли их на согласованность?

Я не тестировал весь код, указанный здесь. Я протестировал свой собственный код (который не был полностью согласован, я допускаю) и IndexOfSequence. Я обнаружил, что для многих тестов IndexOfSequence был довольно быстрым, чем мой код, но с повторным тестированием я обнаружил, что он был менее последовательным. В частности, у него, похоже, больше всего проблем с поиском шаблонов в конце массива, но иногда они будут пропустить их в середине массива.

Мой тестовый код не предназначен для повышения эффективности, я просто хотел иметь кучу случайных данных с некоторыми известными строками внутри. Этот тестовый шаблон примерно похож на пограничный маркер в потоке загрузки http-формы. Это то, что я искал, когда я сталкивался с этим кодом, поэтому решил, что проверю его на данные, которые я буду искать. Похоже, что чем длиннее шаблон, тем чаще IndexOfSequence пропустит значение.

private static void TestMethod() 
{ 
    Random rnd = new Random(DateTime.Now.Millisecond); 
    string Pattern = "-------------------------------65498495198498"; 
    byte[] pattern = Encoding.ASCII.GetBytes(Pattern); 

    byte[] testBytes; 
    int count = 3; 
    for (int i = 0; i < 100; i++) 
    { 
     StringBuilder TestString = new StringBuilder(2500); 
     TestString.Append(Pattern); 
     byte[] buf = new byte[1000]; 
     rnd.NextBytes(buf); 
     TestString.Append(Encoding.ASCII.GetString(buf)); 
     TestString.Append(Pattern); 
     rnd.NextBytes(buf); 
     TestString.Append(Encoding.ASCII.GetString(buf)); 
     TestString.Append(Pattern); 
     testBytes = Encoding.ASCII.GetBytes(TestString.ToString()); 

     List<int> idx = IndexOfSequence(ref testBytes, pattern, 0); 
     if (idx.Count != count) 
     { 
      Console.Write("change from {0} to {1} on iteration {2}: ", count, idx.Count, i); 
      foreach (int ix in idx) 
      { 
       Console.Write("{0}, ", ix); 
      } 
      Console.WriteLine(); 
      count = idx.Count; 
     } 
    } 

    Console.WriteLine("Press ENTER to exit"); 
    Console.ReadLine(); 
}

(очевидно, я преобразовал IndexOfSequence от расширения обратно в нормальный метод для этого испытания)

Вот пример работы моего выхода:

change from 3 to 2 on iteration 1: 0, 2090, 
change from 2 to 3 on iteration 2: 0, 1045, 2090, 
change from 3 to 2 on iteration 3: 0, 1045, 
change from 2 to 3 on iteration 4: 0, 1045, 2090, 
change from 3 to 2 on iteration 6: 0, 2090, 
change from 2 to 3 on iteration 7: 0, 1045, 2090, 
change from 3 to 2 on iteration 11: 0, 2090, 
change from 2 to 3 on iteration 12: 0, 1045, 2090, 
change from 3 to 2 on iteration 14: 0, 2090, 
change from 2 to 3 on iteration 16: 0, 1045, 2090, 
change from 3 to 2 on iteration 17: 0, 1045, 
change from 2 to 3 on iteration 18: 0, 1045, 2090, 
change from 3 to 1 on iteration 20: 0, 
change from 1 to 3 on iteration 21: 0, 1045, 2090, 
change from 3 to 2 on iteration 22: 0, 2090, 
change from 2 to 3 on iteration 23: 0, 1045, 2090, 
change from 3 to 2 on iteration 24: 0, 2090, 
change from 2 to 3 on iteration 25: 0, 1045, 2090, 
change from 3 to 2 on iteration 26: 0, 2090, 
change from 2 to 3 on iteration 27: 0, 1045, 2090, 
change from 3 to 2 on iteration 43: 0, 1045, 
change from 2 to 3 on iteration 44: 0, 1045, 2090, 
change from 3 to 2 on iteration 48: 0, 1045, 
change from 2 to 3 on iteration 49: 0, 1045, 2090, 
change from 3 to 2 on iteration 50: 0, 2090, 
change from 2 to 3 on iteration 52: 0, 1045, 2090, 
change from 3 to 2 on iteration 54: 0, 1045, 
change from 2 to 3 on iteration 57: 0, 1045, 2090, 
change from 3 to 2 on iteration 62: 0, 1045, 
change from 2 to 3 on iteration 63: 0, 1045, 2090, 
change from 3 to 2 on iteration 72: 0, 2090, 
change from 2 to 3 on iteration 73: 0, 1045, 2090, 
change from 3 to 2 on iteration 75: 0, 2090, 
change from 2 to 3 on iteration 76: 0, 1045, 2090, 
change from 3 to 2 on iteration 78: 0, 1045, 
change from 2 to 3 on iteration 79: 0, 1045, 2090, 
change from 3 to 2 on iteration 81: 0, 2090, 
change from 2 to 3 on iteration 82: 0, 1045, 2090, 
change from 3 to 2 on iteration 85: 0, 2090, 
change from 2 to 3 on iteration 86: 0, 1045, 2090, 
change from 3 to 2 on iteration 89: 0, 2090, 
change from 2 to 3 on iteration 90: 0, 1045, 2090, 
change from 3 to 2 on iteration 91: 0, 2090, 
change from 2 to 1 on iteration 92: 0, 
change from 1 to 3 on iteration 93: 0, 1045, 2090, 
change from 3 to 1 on iteration 99: 0,

Я не имею в виду, чтобы выбрать на IndexOfSequence, именно так я и начал работать сегодня. В конце дня я заметил, что, по-видимому, в данных отсутствуют шаблоны, поэтому сегодня я написал свой собственный шаблон. Однако это не так быстро. Я собираюсь настроить его немного больше, чтобы увидеть, смогу ли я получить его на 100%, прежде чем опубликовать его.

Я просто хотел напомнить всем, что они должны протестировать такие вещи, чтобы убедиться, что они дают хорошие, повторяемые результаты, прежде чем доверять им производственный код.

источник

2009-08-13 09:51:37

1

Я пробовал различные решения и в конечном итоге модифицировал SearchBytePattern.Я тестировал на 30-килограммовой последовательности, и это быстро :)

static public int SearchBytePattern(byte[] pattern, byte[] bytes) 
    { 
     int matches = 0; 
     for (int i = 0; i < bytes.Length; i++) 
     { 
      if (pattern[0] == bytes[i] && bytes.Length - i >= pattern.Length) 
      { 
       bool ismatch = true; 
       for (int j = 1; j < pattern.Length && ismatch == true; j++) 
       { 
        if (bytes[i + j] != pattern[j]) 
         ismatch = false; 
       } 
       if (ismatch) 
       { 
        matches++; 
        i += pattern.Length - 1; 
       } 
      } 
     } 
     return matches; 
    }

Сообщите мне свои мысли.

источник

2009-11-27 09:46:03

2

Это самые простые и быстрые методы, которые вы можете использовать, и не было бы ничего более быстрого, чем эти. Это небезопасно, но для этого мы используем указатели для скорости. Поэтому здесь я предлагаю вам свои методы расширения, которые я использую для поиска одиночного, и список индексов событий. Я бы хотел сказать, что здесь самый чистый код.

public static unsafe long IndexOf(this byte[] Haystack, byte[] Needle) 
    { 
     fixed (byte* H = Haystack) fixed (byte* N = Needle) 
     { 
      long i = 0; 
      for (byte* hNext = H, hEnd = H + Haystack.LongLength; hNext < hEnd; i++, hNext++) 
      { 
       bool Found = true; 
       for (byte* hInc = hNext, nInc = N, nEnd = N + Needle.LongLength; Found && nInc < nEnd; Found = *nInc == *hInc, nInc++, hInc++) ; 
       if (Found) return i; 
      } 
      return -1; 
     } 
    } 
    public static unsafe List<long> IndexesOf(this byte[] Haystack, byte[] Needle) 
    { 
     List<long> Indexes = new List<long>(); 
     fixed (byte* H = Haystack) fixed (byte* N = Needle) 
     { 
      long i = 0; 
      for (byte* hNext = H, hEnd = H + Haystack.LongLength; hNext < hEnd; i++, hNext++) 
      { 
       bool Found = true; 
       for (byte* hInc = hNext, nInc = N, nEnd = N + Needle.LongLength; Found && nInc < nEnd; Found = *nInc == *hInc, nInc++, hInc++) ; 
       if (Found) Indexes.Add(i); 
      } 
      return Indexes; 
     } 
    }

протестированные с Locate, это 1.2-1.4x быстрее

источник

2011-03-08 00:55:05 Foubar

1

Вот решение, которое я придумал. Я включил заметки, которые я нашел на этом пути с реализацией. Он может совпадать вперед, назад и с различными (в/де) суммами, например, направление; начиная с любого смещения в стоге сена.

Любой вход был бы потрясающим!

/// <summary> 
    /// Matches a byte array to another byte array 
    /// forwards or reverse 
    /// </summary> 
    /// <param name="a">byte array</param> 
    /// <param name="offset">start offset</param> 
    /// <param name="len">max length</param> 
    /// <param name="b">byte array</param> 
    /// <param name="direction">to move each iteration</param> 
    /// <returns>true if all bytes match, otherwise false</returns> 
    internal static bool Matches(ref byte[] a, int offset, int len, ref byte[] b, int direction = 1) 
    { 
     #region Only Matched from offset Within a and b, could not differ, e.g. if you wanted to mach in reverse for only part of a in some of b that would not work 
     //if (direction == 0) throw new ArgumentException("direction"); 
     //for (; offset < len; offset += direction) if (a[offset] != b[offset]) return false; 
     //return true; 
     #endregion 
     //Will match if b contains len of a and return a a index of positive value 
     return IndexOfBytes(ref a, ref offset, len, ref b, len) != -1; 
    } 

///Here is the Implementation code 

    /// <summary> 
    /// Swaps two integers without using a temporary variable 
    /// </summary> 
    /// <param name="a"></param> 
    /// <param name="b"></param> 
    internal static void Swap(ref int a, ref int b) 
    { 
     a ^= b; 
     b ^= a; 
     a ^= b; 
    } 

    /// <summary> 
    /// Swaps two bytes without using a temporary variable 
    /// </summary> 
    /// <param name="a"></param> 
    /// <param name="b"></param> 
    internal static void Swap(ref byte a, ref byte b) 
    { 
     a ^= b; 
     b ^= a; 
     a ^= b; 
    } 

    /// <summary> 
    /// Can be used to find if a array starts, ends spot Matches or compltely contains a sub byte array 
    /// Set checkLength to the amount of bytes from the needle you want to match, start at 0 for forward searches start at hayStack.Lenght -1 for reverse matches 
    /// </summary> 
    /// <param name="a">Needle</param> 
    /// <param name="offset">Start in Haystack</param> 
    /// <param name="len">Length of required match</param> 
    /// <param name="b">Haystack</param> 
    /// <param name="direction">Which way to move the iterator</param> 
    /// <returns>Index if found, otherwise -1</returns> 
    internal static int IndexOfBytes(ref byte[] needle, ref int offset, int checkLength, ref byte[] haystack, int direction = 1) 
    { 
     //If the direction is == 0 we would spin forever making no progress 
     if (direction == 0) throw new ArgumentException("direction"); 
     //Cache the length of the needle and the haystack, setup the endIndex for a reverse search 
     int needleLength = needle.Length, haystackLength = haystack.Length, endIndex = 0, workingOffset = offset; 
     //Allocate a value for the endIndex and workingOffset 
     //If we are going forward then the bound is the haystackLength 
     if (direction >= 1) endIndex = haystackLength; 
     #region [Optomization - Not Required] 
     //{ 

      //I though this was required for partial matching but it seems it is not needed in this form 
      //workingOffset = needleLength - checkLength; 
     //} 
     #endregion 
     else Swap(ref workingOffset, ref endIndex);     
     #region [Optomization - Not Required] 
     //{ 
      //Otherwise we are going in reverse and the endIndex is the needleLength - checkLength     
      //I though the length had to be adjusted but it seems it is not needed in this form 
      //endIndex = needleLength - checkLength; 
     //} 
     #endregion 
     #region [Optomized to above] 
     //Allocate a value for the endIndex 
     //endIndex = direction >= 1 ? haystackLength : needleLength - checkLength, 
     //Determine the workingOffset 
     //workingOffset = offset > needleLength ? offset : needleLength;    
     //If we are doing in reverse swap the two 
     //if (workingOffset > endIndex) Swap(ref workingOffset, ref endIndex); 
     //Else we are going in forward direction do the offset is adjusted by the length of the check 
     //else workingOffset -= checkLength; 
     //Start at the checkIndex (workingOffset) every search attempt 
     #endregion 
     //Save the checkIndex (used after the for loop is done with it to determine if the match was checkLength long) 
     int checkIndex = workingOffset; 
     #region [For Loop Version] 
     ///Optomized with while (single op) 
     ///for (int checkIndex = workingOffset; checkIndex < endIndex; offset += direction, checkIndex = workingOffset) 
      ///{ 
       ///Start at the checkIndex 
       /// While the checkIndex < checkLength move forward 
       /// If NOT (the needle at the checkIndex matched the haystack at the offset + checkIndex) BREAK ELSE we have a match continue the search     
       /// for (; checkIndex < checkLength; ++checkIndex) if (needle[checkIndex] != haystack[offset + checkIndex]) break; else continue; 
       /// If the match was the length of the check 
       /// if (checkIndex == checkLength) return offset; //We are done matching 
      ///} 
     #endregion 
     //While the checkIndex < endIndex 
     while (checkIndex < endIndex) 
     { 
      for (; checkIndex < checkLength; ++checkIndex) if (needle[checkIndex] != haystack[offset + checkIndex]) break; else continue; 
      //If the match was the length of the check 
      if (checkIndex == checkLength) return offset; //We are done matching 
      //Move the offset by the direction, reset the checkIndex to the workingOffset 
      offset += direction; checkIndex = workingOffset;     
     } 
     //We did not have a match with the given options 
     return -1; 
    }

источник

2011-07-04 07:10:33 Jay

0

Я немного опоздал на вечеринку Как насчет использования алгоритма Бойера Мура, но искать байты вместо строк. C# код ниже.

EyeCode Inc.

class Program { 
    static void Main(string[] args) { 
     byte[] text   = new byte[] {12,3,5,76,8,0,6,125,23,36,43,76,125,56,34,234,12,4,5,76,8,0,6,125,234,56,211,122,22,4,7,89,76,64,12,3,5,76,8,0,6,123}; 
     byte[] pattern  = new byte[] {12,3,5,76,8,0,6,125}; 

     BoyerMoore tmpSearch = new BoyerMoore(pattern,text); 

     Console.WriteLine(tmpSearch.Match()); 
     Console.ReadKey(); 
    } 

    public class BoyerMoore { 

     private static int ALPHABET_SIZE = 256; 

     private byte[] text; 
     private byte[] pattern; 

     private int[] last; 
     private int[] match; 
     private int[] suffix; 

     public BoyerMoore(byte[] pattern, byte[] text) { 
      this.text = text; 
      this.pattern = pattern; 
      last = new int[ALPHABET_SIZE]; 
      match = new int[pattern.Length]; 
      suffix = new int[pattern.Length]; 
     } 


     /** 
     * Searches the pattern in the text. 
     * returns the position of the first occurrence, if found and -1 otherwise. 
     */ 
     public int Match() { 
      // Preprocessing 
      ComputeLast(); 
      ComputeMatch(); 

      // Searching 
      int i = pattern.Length - 1; 
      int j = pattern.Length - 1;  
      while (i < text.Length) { 
       if (pattern[j] == text[i]) { 
        if (j == 0) { 
         return i; 
        } 
        j--; 
        i--; 
       } 
       else { 
        i += pattern.Length - j - 1 + Math.Max(j - last[text[i]], match[j]); 
        j = pattern.Length - 1; 
       } 
      } 
      return -1;  
      } 


     /** 
     * Computes the function last and stores its values in the array last. 
     * last(Char ch) = the index of the right-most occurrence of the character ch 
     *               in the pattern; 
     *     -1 if ch does not occur in the pattern. 
     */ 
     private void ComputeLast() { 
      for (int k = 0; k < last.Length; k++) { 
       last[k] = -1; 
      } 
      for (int j = pattern.Length-1; j >= 0; j--) { 
       if (last[pattern[j]] < 0) { 
        last[pattern[j]] = j; 
       } 
      } 
     } 


     /** 
     * Computes the function match and stores its values in the array match. 
     * match(j) = min{ s | 0 < s <= j && p[j-s]!=p[j] 
     *       && p[j-s+1]..p[m-s-1] is suffix of p[j+1]..p[m-1] }, 
     *               if such s exists, else 
     *   min{ s | j+1 <= s <= m 
     *       && p[0]..p[m-s-1] is suffix of p[j+1]..p[m-1] }, 
     *               if such s exists, 
     *   m, otherwise, 
     * where p is the pattern and m is its length. 
     */ 
     private void ComputeMatch() { 
      /* Phase 1 */ 
      for (int j = 0; j < match.Length; j++) { 
       match[j] = match.Length; 
      } //O(m) 

      ComputeSuffix(); //O(m) 

      /* Phase 2 */ 
      //Uses an auxiliary array, backwards version of the KMP failure function. 
      //suffix[i] = the smallest j > i s.t. p[j..m-1] is a prefix of p[i..m-1], 
      //if there is no such j, suffix[i] = m 

      //Compute the smallest shift s, such that 0 < s <= j and 
      //p[j-s]!=p[j] and p[j-s+1..m-s-1] is suffix of p[j+1..m-1] or j == m-1}, 
      //               if such s exists, 
      for (int i = 0; i < match.Length - 1; i++) { 
       int j = suffix[i + 1] - 1; // suffix[i+1] <= suffix[i] + 1 
       if (suffix[i] > j) { // therefore pattern[i] != pattern[j] 
        match[j] = j - i; 
       } 
       else {// j == suffix[i] 
        match[j] = Math.Min(j - i + match[i], match[j]); 
       } 
      } 

      /* Phase 3 */ 
      //Uses the suffix array to compute each shift s such that 
      //p[0..m-s-1] is a suffix of p[j+1..m-1] with j < s < m 
      //and stores the minimum of this shift and the previously computed one. 
      if (suffix[0] < pattern.Length) { 
       for (int j = suffix[0] - 1; j >= 0; j--) { 
        if (suffix[0] < match[j]) { match[j] = suffix[0]; } 
       } 
       { 
        int j = suffix[0]; 
        for (int k = suffix[j]; k < pattern.Length; k = suffix[k]) { 
         while (j < k) { 
          if (match[j] > k) { 
           match[j] = k; 
          } 
          j++; 
         } 
        } 
       } 
      } 
     } 


     /** 
     * Computes the values of suffix, which is an auxiliary array, 
     * backwards version of the KMP failure function. 
     * 
     * suffix[i] = the smallest j > i s.t. p[j..m-1] is a prefix of p[i..m-1], 
     * if there is no such j, suffix[i] = m, i.e. 

     * p[suffix[i]..m-1] is the longest prefix of p[i..m-1], if suffix[i] < m. 
     */ 
     private void ComputeSuffix() {   
      suffix[suffix.Length-1] = suffix.Length;    
      int j = suffix.Length - 1; 
      for (int i = suffix.Length - 2; i >= 0; i--) { 
       while (j < suffix.Length - 1 && !pattern[j].Equals(pattern[i])) { 
        j = suffix[j + 1] - 1; 
       } 
       if (pattern[j] == pattern[i]) { 
        j--; 
       } 
       suffix[i] = j + 1; 
      } 
     } 

    } 

}

источник

2011-08-06 03:15:37 Victor

0

Вот простой код, который я написал, используя только основные типов данных: (возвращает индекс первого вхождения)

private static int findMatch(byte[] data, byte[] pattern) { 
    if(pattern.length > data.length){ 
     return -1; 
    } 
    for(int i = 0; i<data.length ;){ 
     int j; 
     for(j=0;j<pattern.length;j++){ 

      if(pattern[j]!=data[i]) 
       break; 
      i++; 
     } 
     if(j==pattern.length){ 
      System.out.println("Pattern found at : "+(i - pattern.length)); 
      return i - pattern.length ; 
     } 
     if(j!=0)continue; 
     i++; 
    } 

    return -1; 
}

источник

2012-04-13 11:47:57 Ravi

4

Я отсутствующий метод LINQ/ответ :-)

/// <summary> 
/// Searches in the haystack array for the given needle using the default equality operator and returns the index at which the needle starts. 
/// </summary> 
/// <typeparam name="T">Type of the arrays.</typeparam> 
/// <param name="haystack">Sequence to operate on.</param> 
/// <param name="needle">Sequence to search for.</param> 
/// <returns>Index of the needle within the haystack or -1 if the needle isn't contained.</returns> 
public static IEnumerable<int> IndexOf<T>(this T[] haystack, T[] needle) 
{ 
    if ((needle != null) && (haystack.Length >= needle.Length)) 
    { 
     for (int l = 0; l < haystack.Length - needle.Length + 1; l++) 
     { 
      if (!needle.Where((data, index) => !haystack[l + index].Equals(data)).Any()) 
      { 
       yield return l; 
      } 
     } 
    } 
}

источник

2012-08-08 14:41:19 Matten

23

Использование Методы LINQ.

public static IEnumerable<int> PatternAt(byte[] source, byte[] pattern) 
{ 
    for (int i = 0; i < source.Length; i++) 
    { 
     if (source.Skip(i).Take(pattern.Length).SequenceEqual(pattern)) 
     { 
      yield return i; 
     } 
    } 
}

Очень простой!

источник

2013-02-05 16:28:52 YujiSoftware

0

Еще один ответ, который легко отслеживать и довольно эффективен для работы O (n) без использования небезопасного кода или копирования частей исходных массивов.

Обязательно проверьте. Некоторые из предложений, найденных в этой теме, восприимчивы к ситуациям.

static void Main(string[] args) 
    { 
     //               1 1 1 1 1 1 1 1 1 1 2 2 2 
     //       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 
     byte[] buffer = new byte[] { 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 5, 0, 5, 5, 1, 2 }; 
     byte[] beginPattern = new byte[] { 1, 0, 2 }; 
     byte[] middlePattern = new byte[] { 8, 9, 10 }; 
     byte[] endPattern = new byte[] { 9, 10, 11 }; 
     byte[] wholePattern = new byte[] { 1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; 
     byte[] noMatchPattern = new byte[] { 7, 7, 7 }; 

     int beginIndex = ByteArrayPatternIndex(buffer, beginPattern); 
     int middleIndex = ByteArrayPatternIndex(buffer, middlePattern); 
     int endIndex = ByteArrayPatternIndex(buffer, endPattern); 
     int wholeIndex = ByteArrayPatternIndex(buffer, wholePattern); 
     int noMatchIndex = ByteArrayPatternIndex(buffer, noMatchPattern); 
    } 

    /// <summary> 
    /// Returns the index of the first occurrence of a byte array within another byte array 
    /// </summary> 
    /// <param name="buffer">The byte array to be searched</param> 
    /// <param name="pattern">The byte array that contains the pattern to be found</param> 
    /// <returns>If buffer contains pattern then the index of the first occurrence of pattern within buffer; otherwise, -1</returns> 
    public static int ByteArrayPatternIndex(byte[] buffer, byte[] pattern) 
    { 
     if (buffer != null && pattern != null && pattern.Length <= buffer.Length) 
     { 
      int resumeIndex; 
      for (int i = 0; i <= buffer.Length - pattern.Length; i++) 
      { 
       if (buffer[i] == pattern[0]) // Current byte equals first byte of pattern 
       { 
        resumeIndex = 0; 
        for (int x = 1; x < pattern.Length; x++) 
        { 
         if (buffer[i + x] == pattern[x]) 
         { 
          if (x == pattern.Length - 1) // Matched the entire pattern 
           return i; 
          else if (resumeIndex == 0 && buffer[i + x] == pattern[0]) // The current byte equals the first byte of the pattern so start here on the next outer loop iteration 
           resumeIndex = i + x; 
         } 
         else 
         { 
          if (resumeIndex > 0) 
           i = resumeIndex - 1; // The outer loop iterator will increment so subtract one 
          else if (x > 1) 
           i += (x - 1); // Advance the outer loop variable since we already checked these bytes 
          break; 
         } 
        } 
       } 
      } 
     } 
     return -1; 
    } 

    /// <summary> 
    /// Returns the indexes of each occurrence of a byte array within another byte array 
    /// </summary> 
    /// <param name="buffer">The byte array to be searched</param> 
    /// <param name="pattern">The byte array that contains the pattern to be found</param> 
    /// <returns>If buffer contains pattern then the indexes of the occurrences of pattern within buffer; otherwise, null</returns> 
    /// <remarks>A single byte in the buffer array can only be part of one match. For example, if searching for 1,2,1 in 1,2,1,2,1 only zero would be returned.</remarks> 
    public static int[] ByteArrayPatternIndex(byte[] buffer, byte[] pattern) 
    { 
     if (buffer != null && pattern != null && pattern.Length <= buffer.Length) 
     { 
      List<int> indexes = new List<int>(); 
      int resumeIndex; 
      for (int i = 0; i <= buffer.Length - pattern.Length; i++) 
      { 
       if (buffer[i] == pattern[0]) // Current byte equals first byte of pattern 
       { 
        resumeIndex = 0; 
        for (int x = 1; x < pattern.Length; x++) 
        { 
         if (buffer[i + x] == pattern[x]) 
         { 
          if (x == pattern.Length - 1) // Matched the entire pattern 
           indexes.Add(i); 
          else if (resumeIndex == 0 && buffer[i + x] == pattern[0]) // The current byte equals the first byte of the pattern so start here on the next outer loop iteration 
           resumeIndex = i + x; 
         } 
         else 
         { 
          if (resumeIndex > 0) 
           i = resumeIndex - 1; // The outer loop iterator will increment so subtract one 
          else if (x > 1) 
           i += (x - 1); // Advance the outer loop variable since we already checked these bytes 
          break; 
         } 
        } 
       } 
      } 
      if (indexes.Count > 0) 
       return indexes.ToArray(); 
     } 
     return null; 
    }

источник

2013-12-13 00:29:25 ApeShoes

3

Моя версия ответа Foubar в выше, что позволяет избежать ищущего в конце прошлого стоге сена, и позволяет задать начальное смещение. Предположим, что игла не пуста или длиннее стога сена.

public static unsafe long IndexOf(this byte[] haystack, byte[] needle, long startOffset = 0) 
{ 
    fixed (byte* h = haystack) fixed (byte* n = needle) 
    { 
     for (byte* hNext = h + startOffset, hEnd = h + haystack.LongLength + 1 - needle.LongLength, nEnd = n + needle.LongLength; hNext < hEnd; hNext++) 
      for (byte* hInc = hNext, nInc = n; *nInc == *hInc; hInc++) 
       if (++nInc == nEnd) 
        return hNext - h; 
     return -1; 
    } 
}

источник

2015-06-29 04:44:31

0

Вы можете использовать ORegex:

var oregex = new ORegex<byte>("{0}{1}{2}", x=> x==12, x=> x==3, x=> x==5); 
var toSearch = new byte[]{1,1,12,3,5,1,12,3,5,5,5,5}; 

var found = oregex.Matches(toSearch);

Будут найдены два матча:

i:2;l:3 
i:6;l:3

Сложность: O (Н * м), в худшем случае, в реальной жизни это O (n) из-за внутреннего конечного автомата. В некоторых случаях он быстрее, чем .NET Regex. Он компактный, быстрый и разработан специально для сопоставления шаблонов массива.

источник

2016-06-27 07:48:09 eocron

3

Это мой propossal, более простым и быстрым:

int Search(byte[] src, byte[] pattern) 
{ 
    int c = src.Length - pattern.Length + 1; 
    int j; 
    for (int i = 0; i < c; i++) 
    { 
     if (src[i] != pattern[0]) continue; 
     for (j = pattern.Length - 1; j >= 1 && src[i + j] == pattern[j]; j--) ; 
     if (j == 0) return i; 
    } 
    return -1; 
}

источник

2016-07-28 01:29:45

0

Я пытался понять предложение Санчеса и сделать производительность быстрее search.Below Кодекса почти equal.But код более понятным.

public int Search3(byte[] src, byte[] pattern) 
    { 
     int index = -1; 

     for (int i = 0; i < src.Length; i++) 
     { 
      if (src[i] != pattern[0]) 
      { 
       continue; 
      } 
      else 
      { 
       bool isContinoue = true; 
       for (int j = 1; j < pattern.Length; j++) 
       { 
        if (src[++i] != pattern[j]) 
        { 
         isContinoue = true; 
         break; 
        } 
        if(j == pattern.Length - 1) 
        { 
         isContinoue = false; 
        } 
       } 
       if (! isContinoue) 
       { 
        index = i-(pattern.Length-1) ; 
        break; 
       } 
      } 
     } 
     return index; 
    }

источник

2017-01-01 08:19:49 Mehmet