Imprima el carácter Kth en substrings concatenadas ordenadas de una string

Dada una string de caracteres alfabéticos inferiores, encuentre el carácter K-ésimo en una string formada por substrings (de una string dada) cuando se concatenan en forma ordenada.

Ejemplos:

Input : str = “banana”
          K = 10
Output : n
All substring in sorted form are,
"a", "an", "ana", "anan", "anana", 
"b", "ba", "ban", "bana", "banan", 
"banana", "n", "na", "nan", "nana"
Concatenated string = “aananaanana
nanabbabanbanabananbananannanannana”
We can see a 10th character in the 
above concatenated string is ‘n’ 
which is our final answer.

Una solución simple es generar todas las substrings de una string determinada y almacenarlas en una array. Una vez que se generan las substrings, ordénelas y concatene después de ordenarlas. Finalmente imprima el carácter K-ésimo en la string concatenada.

Una solución eficiente se basa en contar distintas substrings de una string usando una array de sufijos . El mismo método se utiliza para resolver este problema también. Después de obtener la array de sufijos y la array de lcp, recorremos todos los valores de lcp y, para cada uno de esos valores, calculamos los caracteres que se omitirán. Seguimos restando estos muchos caracteres de nuestra K, cuando el carácter a saltar se convierte en más que K, nos detenemos y recorremos las substrings correspondientes al lcp[i] actual, en el que recorremos desde lcp[i] hasta la longitud máxima de la string y luego imprime el carácter Kth.

Implementación:

C++

// C++ program to print Kth character
// in sorted concatenated substrings
#include <bits/stdc++.h>
using namespace std;
 
// Structure to store information of a suffix
struct suffix
{
    int index;  // To store original index
    int rank[2]; // To store ranks and next
                 // rank pair
};
 
// A comparison function used by sort() to compare
// two suffixes. Compares two pairs, returns 1 if
// first pair is smaller
int cmp(struct suffix a, struct suffix b)
{
    return (a.rank[0] == b.rank[0])?
           (a.rank[1] < b.rank[1] ?1: 0):
           (a.rank[0] < b.rank[0] ?1: 0);
}
 
// This is the main function that takes a string
// 'txt' of size n as an argument, builds and return
// the suffix array for the given string
vector<int> buildSuffixArray(string txt, int n)
{
    // A structure to store suffixes and their indexes
    struct suffix suffixes[n];
 
    // Store suffixes and their indexes in an array
    // of structures. The structure is needed to sort
    // the suffixes alphabetically and maintain their
    // old indexes while sorting
    for (int i = 0; i < n; i++)
    {
        suffixes[i].index = i;
        suffixes[i].rank[0] = txt[i] - 'a';
        suffixes[i].rank[1] = ((i+1) < n)?
                              (txt[i + 1] - 'a'): -1;
    }
 
    // Sort the suffixes using the comparison function
    // defined above.
    sort(suffixes, suffixes+n, cmp);
 
    // At his point, all suffixes are sorted according
    // to first 2 characters.  Let us sort suffixes
    // according to first 4 characters, then first
    // 8 and so on
    int ind[n];  // This array is needed to get the
                 // index in suffixes[] from original
                 // index. This mapping is needed to get
                 // next suffix.
 
    for (int k = 4; k < 2*n; k = k*2)
    {
        // Assigning rank and index values to first suffix
        int rank = 0;
        int prev_rank = suffixes[0].rank[0];
        suffixes[0].rank[0] = rank;
        ind[suffixes[0].index] = 0;
 
        // Assigning rank to suffixes
        for (int i = 1; i < n; i++)
        {
            // If first rank and next ranks are same as
            // that of previous suffix in array, assign
            // the same new rank to this suffix
            if (suffixes[i].rank[0] == prev_rank &&
               suffixes[i].rank[1] == suffixes[i-1].rank[1])
            {
                prev_rank = suffixes[i].rank[0];
                suffixes[i].rank[0] = rank;
            }
 
            else // Otherwise increment rank and assign
            {
                prev_rank = suffixes[i].rank[0];
                suffixes[i].rank[0] = ++rank;
            }
            ind[suffixes[i].index] = i;
        }
 
        // Assign next rank to every suffix
        for (int i = 0; i < n; i++)
        {
            int nextindex = suffixes[i].index + k/2;
            suffixes[i].rank[1] = (nextindex < n)?
                      suffixes[ind[nextindex]].rank[0]: -1;
        }
 
        // Sort the suffixes according to first k characters
        sort(suffixes, suffixes+n, cmp);
    }
 
    // Store indexes of all sorted suffixes in the suffix
    // array
    vector<int>suffixArr;
    for (int i = 0; i < n; i++)
        suffixArr.push_back(suffixes[i].index);
 
    // Return the suffix array
    return  suffixArr;
}
 
/* To construct and return LCP */
vector<int> kasai(string txt, vector<int> suffixArr)
{
    int n = suffixArr.size();
 
    // To store LCP array
    vector<int> lcp(n, 0);
 
    // An auxiliary array to store inverse of suffix array
    // elements. For example if suffixArr[0] is 5, the
    // invSuff[5] would store 0.  This is used to get next
    // suffix string from suffix array.
    vector<int> invSuff(n, 0);
 
    // Fill values in invSuff[]
    for (int i=0; i < n; i++)
        invSuff[suffixArr[i]] = i;
 
    // Initialize length of previous LCP
    int k = 0;
 
    // Process all suffixes one by one starting from
    // first suffix in txt[]
    for (int i=0; i<n; i++)
    {
        /* If the current suffix is at n-1, then we don’t
           have next substring to consider. So lcp is not
           defined for this substring, we put zero. */
        if (invSuff[i] == n-1)
        {
            k = 0;
            continue;
        }
 
        /* j contains index of the next substring to
           be considered  to compare with the present
           substring, i.e., next string in suffix array */
        int j = suffixArr[invSuff[i]+1];
 
        // Directly start matching from k'th index as
        // at-least k-1 characters will match
        while (i+k<n && j+k<n && txt[i+k]==txt[j+k])
            k++;
 
        lcp[invSuff[i]] = k; // lcp for the present suffix.
 
        // Deleting the starting character from the string.
        if (k>0)
            k--;
    }
 
    // return the constructed lcp array
    return lcp;
}
 
//    Utility method to get sum of first N numbers
int sumOfFirstN(int N)
{
    return (N * (N + 1)) / 2;
}
 
// Returns Kth character in sorted concatenated
// substrings of str
char printKthCharInConcatSubstring(string str, int K)
{
    int n = str.length();
    //  calculating suffix array and lcp array
    vector<int> suffixArr = buildSuffixArray(str, n);
    vector<int> lcp = kasai(str, suffixArr);
 
    for (int i = 0; i < lcp.size(); i++)
     {
         //    skipping characters common to substring
         //    (n - suffixArr[i]) is length of current
         //  maximum substring lcp[i] will length of
         // common substring
        int charToSkip = sumOfFirstN(n - suffixArr[i]) -
                         sumOfFirstN(lcp[i]);
 
        /*    if characters are more than K, that means
            Kth character belongs to substring
            corresponding to current lcp[i]*/
        if (K <= charToSkip)
        {
            // loop from current lcp value to current
            // string length
            for (int j = lcp[i] + 1; j <= (n-suffixArr[i]); j++)
            {
                int curSubstringLen = j;
 
                /* Again reduce K by current substring's
                   length one by one and when it becomes less,
                    print Kth character of current substring */
                if (K <= curSubstringLen)
                    return str[(suffixArr[i] + K - 1)];
                else
                    K -= curSubstringLen;
 
            }
            break;
        }
        else
            K -= charToSkip;
     }
}
 
//    Driver code to test above methods
int main()
{
    string str = "banana";
    int K = 10;
    cout << printKthCharInConcatSubstring(str, K);
    return 0;
}

Python3

# Python3 program to print Kth character
# in sorted concatenated substrings
 
# Structure to store information of a suffix
class suffix:
     
    def __init__(self):
         
        self.index = 0
         
        # To store original index
        self.rank = [0] * 2
         
        # To store ranks and next
        # rank pair
 
# This is the main function that takes a string
# 'txt' of size n as an argument, builds and return
# the suffix array for the given string
def buildSuffixArray(txt: str, n: int) -> list:
 
    # A structure to store suffixes
    # and their indexes
    suffixes = [0] * n
    for i in range(n):
        suffixes[i] = suffix()
 
    # Store suffixes and their indexes in an array
    # of structures. The structure is needed to sort
    # the suffixes alphabetically and maintain their
    # old indexes while sorting
    for i in range(n):
        suffixes[i].index = i
        suffixes[i].rank[0] = ord(txt[i]) - ord('a')
        suffixes[i].rank[1] = (ord(txt[i + 1]) -
                        ord('a')) if ((i + 1) < n) else -1
 
    # Sort the suffixes using the comparison function
    # defined above.
    suffixes.sort(key = lambda a: a.rank)
 
    # At his point, all suffixes are sorted according
    # to first 2 characters.  Let us sort suffixes
    # according to first 4 characters, then first
    # 8 and so on
    ind = [0] * n
     
    # This array is needed to get the
    # index in suffixes[] from original
    # index. This mapping is needed to get
    # next suffix.
    k = 4
    while k < 2 * n:
        k *= 2
         
        # for k in range(4, 2 * n, k * 2):
 
        # Assigning rank and index values
        # to first suffix
        rank = 0
        prev_rank = suffixes[0].rank[0]
        suffixes[0].rank[0] = rank
        ind[suffixes[0].index] = 0
 
        # Assigning rank to suffixes
        for i in range(1, n):
 
            # If first rank and next ranks are same as
            # that of previous suffix in array, assign
            # the same new rank to this suffix
            if (suffixes[i].rank[0] == prev_rank and
                suffixes[i].rank[1] == suffixes[i - 1].rank[1]):
                prev_rank = suffixes[i].rank[0]
                suffixes[i].rank[0] = rank
                 
            # Otherwise increment rank and assign
            else: 
                prev_rank = suffixes[i].rank[0]
                rank += 1
                suffixes[i].rank[0] = rank
 
            ind[suffixes[i].index] = i
 
        # Assign next rank to every suffix
        for i in range(n):
            nextindex = suffixes[i].index + k // 2
            suffixes[i].rank[1] = suffixes[ind[nextindex]].rank[0] if (
                nextindex < n) else -1
 
        # Sort the suffixes according to first k characters
        suffixes.sort(key = lambda a : a.rank)
 
    # Store indexes of all sorted suffixes
    # in the suffix array
    suffixArr = []
    for i in range(n):
        suffixArr.append(suffixes[i].index)
 
    # Return the suffix array
    return suffixArr
 
# To construct and return LCP */
def kasai(txt: str, suffixArr: list) -> list:
 
    n = len(suffixArr)
 
    # To store LCP array
    lcp = [0] * n
 
    # An auxiliary array to store inverse of
    # suffix array elements. For example if
    # suffixArr[0] is 5, the invSuff[5] would
    # store 0.  This is used to get next
    # suffix string from suffix array.
    invSuff = [0] * n
 
    # Fill values in invSuff[]
    for i in range(n):
        invSuff[suffixArr[i]] = i
 
    # Initialize length of previous LCP
    k = 0
 
    # Process all suffixes one by one
    # starting from first suffix in txt[]
    for i in range(n):
 
        # If the current suffix is at n-1, then
        # we don’t have next substring to
        # consider. So lcp is not defined for
        # this substring, we put zero.
        if (invSuff[i] == n - 1):
            k = 0
            continue
 
        # j contains index of the next substring to
        # be considered  to compare with the present
        # substring, i.e., next string in suffix array
        j = suffixArr[invSuff[i] + 1]
 
        # Directly start matching from k'th index as
        # at-least k-1 characters will match
        while (i + k < n and j + k < n and
           txt[i + k] == txt[j + k]):
            k += 1
 
        lcp[invSuff[i]] = k
        # lcp for the present suffix.
 
        # Deleting the starting character
        # from the string.
        if (k > 0):
            k -= 1
 
    # Return the constructed lcp array
    return lcp
 
# Utility method to get sum of first N numbers
def sumOfFirstN(N: int) -> int:
 
    return (N * (N + 1)) // 2
 
# Returns Kth character in sorted concatenated
# substrings of str
def printKthCharInConcatSubstring(string: str,
                               K: int) -> str:
 
    n = len(string)
     
    # Calculating suffix array and lcp array
    suffixArr = buildSuffixArray(string, n)
    lcp = kasai(string, suffixArr)
 
    for i in range(len(lcp)):
 
        # Skipping characters common to substring
        # (n - suffixArr[i]) is length of current
        # maximum substring lcp[i] will length of
        # common substring
        charToSkip = (sumOfFirstN(n - suffixArr[i]) -
                                sumOfFirstN(lcp[i]))
 
        # If characters are more than K, that means
        # Kth character belongs to substring
        # corresponding to current lcp[i]
        if (K <= charToSkip):
 
            # Loop from current lcp value to current
            # string length
            for j in range(lcp[i] + 1,
               (n - suffixArr[i]) + 1):
                curSubstringLen = j
 
                # Again reduce K by current substring's
                # length one by one and when it becomes less,
                # print Kth character of current substring
                if (K <= curSubstringLen):
                    return string[(suffixArr[i] + K - 1)]
                else:
                    K -= curSubstringLen
                     
            break
 
        else:
            K -= charToSkip
 
# Driver code
if __name__ == "__main__":
 
    string = "banana"
    K = 10
     
    print(printKthCharInConcatSubstring(string, K))
 
# This code is contributed by sanjeev2552

Producción

Este artículo es una contribución de Utkarsh Trivedi . Si te gusta GeeksforGeeks y te gustaría contribuir, también puedes escribir un artículo usando write.geeksforgeeks.org o enviar tu artículo por correo a review-team@geeksforgeeks.org. Vea su artículo que aparece en la página principal de GeeksforGeeks y ayude a otros Geeks

Publicación traducida automáticamente

Artículo escrito por GeeksforGeeks-1 y traducido por Barcelona Geeks. The original can be accessed here. Licence: CCBY-SA

C++

Python3

Deja una respuesta Cancelar la respuesta