Cuente las substrings de longitud M que ocurren exactamente K veces en una string

Dada una string S de longitud N y dos enteros M y K , la tarea es contar el número de substrings de longitud M que ocurren exactamente K veces en la string S.

Ejemplos:

Entrada: S = “abacaba”, M = 3, K = 2
Salida: 1
Explicación: Todas las substrings distintas de longitud 3 son “aba”, “bac”, “aca”, “cab”.
De todas estas substrings, solo «aba» aparece dos veces en la string S.
Por lo tanto, el recuento es 1.

Entrada: S = «geeksforgeeks», M = 2, K = 1
Salida: 4
Explicación:
Todas las substrings distintas de longitud 2 son «ge», «ee», «ek», «ks», «sf», «fo» , “o”, “rg”.
De todas estas strings, «sf», «fo», «or», «rg» aparecen una vez en la string S.
Por lo tanto, la cuenta es 4.

Enfoque ingenuo: el enfoque más simple es generar todas las substrings de longitud M y almacenar la frecuencia de cada substring en la string S en un mapa . Ahora, recorra el Mapa y si la frecuencia es igual a K , incremente el conteo en 1 . Después de completar los pasos anteriores, imprima el recuento como resultado.
Complejidad de tiempo: O((N – M)*N*M)
Espacio auxiliar: O(N – M)

Enfoque eficiente: el enfoque anterior se puede optimizar utilizando el algoritmo KMP para encontrar la frecuencia de una substring en la string . Siga los pasos para resolver el problema:

Inicialice una variable, digamos contar como 0 , para almacenar el número de la substring requerida.
Genere todas las substrings de longitud M a partir de la string S e insértelas en una array , digamos arr[].
Atraviese la array arr[] y, para cada string de la array, calcule su frecuencia en la string S utilizando el algoritmo KMP .
Si la frecuencia de la string es igual a P , incremente el conteo en 1 .
Después de completar los pasos anteriores, imprima el valor de conteo como el conteo resultante de substrings.

A continuación se muestra la implementación del enfoque anterior:

C++

// C++ program for the above approach
#include <bits/stdc++.h>
using namespace std;
 
// Function to compute the LPS array
void computeLPSArray(string pat, int M,
                     int lps[])
{
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M) {
 
        // If the characters match
        if (pat[i] == pat[len]) {
 
            len++;
            lps[i] = len;
            i++;
        }
 
        // If pat[i] != pat[len]
        else {
 
            // If length is non-zero
            if (len != 0) {
                len = lps[len - 1];
 
                // Also, note that i is
                // not incremented here
            }
 
            // Otherwise
            else {
                lps[i] = len;
                i++;
            }
        }
    }
}
 
// Function to find the frequency of
// pat in the string txt
int KMPSearch(string pat, string txt)
{
    // Stores length of both strings
    int M = pat.length();
    int N = txt.length();
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int lps[M];
 
    // Store the index for pat[]
    int j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;
 
    while (i < N) {
        if (pat[j] == txt[i]) {
            j++;
            i++;
        }
        if (j == M) {
 
            // If pattern is found the
            // first time, iterate again
            // to check for more patterns
            j = lps[j - 1];
            res++;
 
            // Start i to check for more
            // than once occurrence
            // of pattern, reset i to
            // previous start + 1
            if (lps[j] != 0)
                i = ++next_i;
            j = 0;
        }
 
        // Mismatch after j matches
        else if (i < N
                 && pat[j] != txt[i]) {
 
            // Do not match lps[0..lps[j-1]]
            // characters, they will
            // match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }
 
    // Return the required frequency
    return res;
}
 
// Function to find count of substrings
// of length M occurring exactly P times
// in the string, S
void findCount(string& S, int M, int P)
{
 
    // Store all substrings of length M
    set<string> vec;
 
    // Store the size of the string, S
    int n = S.length();
 
    // Pick starting point
    for (int i = 0; i < n; i++) {
 
        // Pick ending point
        for (int len = 1;
             len <= n - i; len++) {
 
            // If the substring is of
            // length M, insert it in vec
            string s = S.substr(i, len);
            if (s.length() == M) {
                vec.insert(s);
            }
        }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;
 
    // Iterate through the set of
    // substrings
    for (auto it : vec) {
 
        // Store its frequency
        int ans = KMPSearch(it, S);
 
        // If frequency is equal to P
        if (ans == P) {
 
            // Increment count by 1
            count++;
        }
    }
 
    // Print the answer
    cout << count;
}
 
// Driver Code
int main()
{
    string S = "abacaba";
    int M = 3, P = 2;
 
    // Function Call
    findCount(S, M, P);
 
    return 0;
}

Java

// Java Program to implement
// the above approach
 
import java.io.*;
import java.util.*;
 
class GFG {
 
    // Function to compute the LPS array
    static void computeLPSArray(String pat, int M,
                                int lps[])
    {
        // Length of the previous
        // longest prefix suffix
        int len = 0;
        int i = 1;
        lps[0] = 0;
 
        // Iterate from [1, M - 1] to find lps[i]
        while (i < M) {
 
            // If the characters match
            if (pat.charAt(i) == pat.charAt(len)) {
 
                len++;
                lps[i] = len;
                i++;
            }
 
            // If pat[i] != pat[len]
            else {
 
                // If length is non-zero
                if (len != 0) {
                    len = lps[len - 1];
 
                    // Also, note that i is
                    // not incremented here
                }
 
                // Otherwise
                else {
                    lps[i] = len;
                    i++;
                }
            }
        }
    }
 
    // Function to find the frequency of
    // pat in the string txt
    static int KMPSearch(String pat, String txt)
    {
        // Stores length of both strings
        int M = pat.length();
        int N = txt.length();
 
        // Initialize lps[] to store the
        // longest prefix suffix values
        // for the string pattern
        int lps[] = new int[M];
 
        // Store the index for pat[]
        int j = 0;
 
        // Preprocess the pattern
        // (calculate lps[] array)
        computeLPSArray(pat, M, lps);
 
        // Store the index for txt[]
        int i = 0;
        int res = 0;
        int next_i = 0;
 
        while (i < N) {
            if (pat.charAt(j) == txt.charAt(i)) {
                j++;
                i++;
            }
            if (j == M) {
 
                // If pattern is found the
                // first time, iterate again
                // to check for more patterns
                j = lps[j - 1];
                res++;
 
                // Start i to check for more
                // than once occurrence
                // of pattern, reset i to
                // previous start + 1
                if (lps[j] != 0)
                    i = ++next_i;
                j = 0;
            }
 
            // Mismatch after j matches
            else if (i < N
                     && pat.charAt(j) != txt.charAt(i)) {
 
                // Do not match lps[0..lps[j-1]]
                // characters, they will
                // match anyway
                if (j != 0)
                    j = lps[j - 1];
                else
                    i = i + 1;
            }
        }
 
        // Return the required frequency
        return res;
    }
 
    // Function to find count of substrings
    // of length M occurring exactly P times
    // in the string, S
    static void findCount(String S, int M, int P)
    {
 
        // Store all substrings of length M
        // set<string> vec;
        TreeSet<String> vec = new TreeSet<>();
 
        // Store the size of the string, S
        int n = S.length();
 
        // Pick starting point
        for (int i = 0; i < n; i++) {
 
            // Pick ending point
            for (int len = 1; len <= n - i; len++) {
 
                // If the substring is of
                // length M, insert it in vec
                String s = S.substring(i, i + len);
                if (s.length() == M) {
                    vec.add(s);
                }
            }
        }
 
        // Initialise count as 0 to store
        // the required count of substrings
        int count = 0;
 
        // Iterate through the set of
        // substrings
        for (String it : vec) {
 
            // Store its frequency
            int ans = KMPSearch(it, S);
 
            // If frequency is equal to P
            if (ans == P) {
 
                // Increment count by 1
                count++;
            }
        }
 
        // Print the answer
        System.out.println(count);
    }
 
    // Driver Code
    public static void main(String[] args)
    {
 
        String S = "abacaba";
        int M = 3, P = 2;
 
        // Function Call
        findCount(S, M, P);
    }
}
 
// This code is contributed by kingash.

Python3

# Python 3 program for the above approach
 
# Function to compute the LPS array
def computeLPSArray(pat, M, lps):
   
    # Length of the previous
    # longest prefix suffix
    len1 = 0
    i = 1
    lps[0] = 0
 
    # Iterate from [1, M - 1] to find lps[i]
    while (i < M):
       
        # If the characters match
        if (pat[i] == pat[len1]):
            len1 += 1
            lps[i] = len1
            i += 1
 
        # If pat[i] != pat[len]
        else:
            # If length is non-zero
            if (len1 != 0):
                len1 = lps[len1 - 1]
 
                # Also, note that i is
                # not incremented here
 
            # Otherwise
            else:
                lps[i] = len1
                i += 1
 
# Function to find the frequency of
# pat in the string txt
def KMPSearch(pat, txt):
   
    # Stores length of both strings
    M = len(pat)
    N = len(txt)
 
    # Initialize lps[] to store the
    # longest prefix suffix values
    # for the string pattern
    lps = [0 for i in range(M)]
 
    # Store the index for pat[]
    j = 0
 
    # Preprocess the pattern
    # (calculate lps[] array)
    computeLPSArray(pat, M, lps)
 
    # Store the index for txt[]
    i = 0
    res = 0
    next_i = 0
 
    while (i < N):
        if (pat[j] == txt[i]):
            j += 1
            i += 1
        if (j == M):
           
            # If pattern is found the
            # first time, iterate again
            # to check for more patterns
            j = lps[j - 1]
            res += 1
 
            # Start i to check for more
            # than once occurrence
            # of pattern, reset i to
            # previous start + 1
            if (lps[j] != 0):
                next_i += 1
                i = next_i
            j = 0
 
        # Mismatch after j matches
        elif (i < N and pat[j] != txt[i]):
            # Do not match lps[0..lps[j-1]]
            # characters, they will
            # match anyway
            if (j != 0):
                j = lps[j - 1]
            else:
                i = i + 1
 
    # Return the required frequency
    return res
 
# Function to find count of substrings
# of length M occurring exactly P times
# in the string, S
def findCount(S, M, P):
   
    # Store all substrings of length M
    vec = set()
 
    # Store the size of the string, S
    n = len(S)
 
    # Pick starting point
    for i in range(n):
       
        # Pick ending point
        for len1 in range(n - i + 1):
           
            # If the substring is of
            # length M, insert it in vec
            s = S[i:len1]
             
          #  if (len1(s) == M):
           #     vec.add(s)
 
    # Initialise count as 0 to store
    # the required count of substrings
    count = 1
 
    # Iterate through the set of
    # substrings
    for it in vec:
       
        # Store its frequency
        ans = KMPSearch(it, S)
 
        # If frequency is equal to P
        if (ans == P):
           
            # Increment count by 1
            count += 1
 
    # Print the answer
    print(count)
 
# Driver Code
if __name__ == '__main__':
    S = "abacaba"
    M = 3
    P = 2
 
    # Function Call
    findCount(S, M, P)
     
    # This code is contributed by ipg2016107.

C#

// C# program for the above approach
using System;
using System.Collections.Generic;
class GFG
{
 
  // Function to compute the LPS array
  static void computeLPSArray(string pat, int M, int[] lps)
  {
 
    // Length of the previous
    // longest prefix suffix
    int len = 0;
    int i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M)
    {
 
      // If the characters match
      if (pat[i] == pat[len])
      {
        len++;
        lps[i] = len;
        i++;
      }
 
      // If pat[i] != pat[len]
      else {
 
        // If length is non-zero
        if (len != 0) {
          len = lps[len - 1];
 
          // Also, note that i is
          // not incremented here
        }
 
        // Otherwise
        else {
          lps[i] = len;
          i++;
        }
      }
    }
  }
 
  // Function to find the frequency of
  // pat in the string txt
  static int KMPSearch(string pat, string txt)
  {
 
    // Stores length of both strings
    int M = pat.Length;
    int N = txt.Length;
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    int[] lps = new int[M];
 
    // Store the index for pat[]
    int j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    int i = 0;
    int res = 0;
    int next_i = 0;
 
    while (i < N) {
      if (pat[j] == txt[i]) {
        j++;
        i++;
      }
      if (j == M) {
 
        // If pattern is found the
        // first time, iterate again
        // to check for more patterns
        j = lps[j - 1];
        res++;
 
        // Start i to check for more
        // than once occurrence
        // of pattern, reset i to
        // previous start + 1
        if (lps[j] != 0)
          i = ++next_i;
        j = 0;
      }
 
      // Mismatch after j matches
      else if (i < N
               && pat[j] != txt[i]) {
 
        // Do not match lps[0..lps[j-1]]
        // characters, they will
        // match anyway
        if (j != 0)
          j = lps[j - 1];
        else
          i = i + 1;
      }
    }
 
    // Return the required frequency
    return res;
  }
 
  // Function to find count of substrings
  // of length M occurring exactly P times
  // in the string, S
  static void findCount(string S, int M, int P)
  {
 
    // Store all substrings of length M
    HashSet<string> vec = new HashSet<string>();
 
    // Store the size of the string, S
    int n = S.Length;
 
    // Pick starting point
    for (int i = 0; i < n; i++) {
 
      // Pick ending point
      for (int len = 1;
           len <= n - i; len++) {
 
        // If the substring is of
        // length M, insert it in vec
        string s = S.Substring(i, len);
        if (s.Length == M) {
          vec.Add(s);
        }
      }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    int count = 0;
 
    // Iterate through the set of
    // substrings
    foreach(string it in vec) {
 
      // Store its frequency
      int ans = KMPSearch(it, S);
 
      // If frequency is equal to P
      if (ans == P) {
 
        // Increment count by 1
        count++;
      }
    }
 
    // Print the answer
    Console.WriteLine(count);
  }
 
  // Driver code
  static void Main() {
    string S = "abacaba";
    int M = 3, P = 2;
 
    // Function Call
    findCount(S, M, P);
  }
}
 
// This code is contributed by divyeshrabadiya07.

Javascript

<script>
 
//Javascript implementation of the approach
 
// Function to compute the LPS array
function computeLPSArray(pat, M, lps)
{
    // Length of the previous
    // longest prefix suffix
    var len = 0;
    var i = 1;
    lps[0] = 0;
 
    // Iterate from [1, M - 1] to find lps[i]
    while (i < M) {
 
        // If the characters match
        if (pat[i] == pat[len]) {
 
            len++;
            lps[i] = len;
            i++;
        }
 
        // If pat[i] != pat[len]
        else {
 
            // If length is non-zero
            if (len != 0) {
                len = lps[len - 1];
 
                // Also, note that i is
                // not incremented here
            }
 
            // Otherwise
            else {
                lps[i] = len;
                i++;
            }
        }
    }
}
 
// Function to find the frequency of
// pat in the string txt
function KMPSearch(pat, txt)
{
    // Stores length of both strings
    var M = pat.length;
    var N = txt.length;
 
    // Initialize lps[] to store the
    // longest prefix suffix values
    // for the string pattern
    var lps = new Array(M);
 
    // Store the index for pat[]
    var j = 0;
 
    // Preprocess the pattern
    // (calculate lps[] array)
    computeLPSArray(pat, M, lps);
 
    // Store the index for txt[]
    var i = 0;
    var res = 0;
    var next_i = 0;
 
    while (i < N) {
        if (pat[j] == txt[i]) {
            j++;
            i++;
        }
        if (j == M) {
 
            // If pattern is found the
            // first time, iterate again
            // to check for more patterns
            j = lps[j - 1];
            res++;
 
            // Start i to check for more
            // than once occurrence
            // of pattern, reset i to
            // previous start + 1
            if (lps[j] != 0)
                i = ++next_i;
            j = 0;
        }
 
        // Mismatch after j matches
        else if (i < N
                 && pat[j] != txt[i]) {
 
            // Do not match lps[0..lps[j-1]]
            // characters, they will
            // match anyway
            if (j != 0)
                j = lps[j - 1];
            else
                i = i + 1;
        }
    }
 
    // Return the required frequency
    return res;
}
 
// Function to find count of substrings
// of length M occurring exactly P times
// in the string, S
function findCount( S, M, P)
{
 
    // Store all substrings of length M
    var vec = new Set();
 
    // Store the size of the string, S
    var n = S.length;
 
    // Pick starting point
    for (var i = 0; i < n; i++) {
 
        // Pick ending point
        for (var len = 1;
             len <= n - i; len++) {
 
            // If the substring is of
            // length M, insert it in vec
            var s = S.substring(i, len);
            if (s.length == M) {
                vec.add(s);
            }
        }
    }
 
    // Initialise count as 0 to store
    // the required count of substrings
    var count = 0;
 
    // Iterate through the set of
    // substrings
    for (const it of vec){
 
        // Store its frequency
        var ans = KMPSearch(it, S);
 
        // If frequency is equal to P
        if (ans == P) {
 
            // Increment count by 1
            count++;
        }
    }
 
    // Print the answer
    document.write( count);
}
 
 
var S = "abacaba";
var M = 3, P = 2;
// Function Call
findCount(S, M, P);
 
// This code is contributed by SoumikMondal
 
</script>

Producción:

Complejidad de Tiempo: O((N*M) + (N ² – M ² ))
Espacio Auxiliar: O(N – M)

Publicación traducida automáticamente

Artículo escrito por koulick_sadhu y traducido por Barcelona Geeks. The original can be accessed here. Licence: CCBY-SA

C++

Java

Python3

C#

Javascript

Deja una respuesta Cancelar la respuesta