Descargar publicaciones de Instagram usando el módulo Python Selenium

En este artículo, aprenderemos cómo podemos descargar publicaciones de Instagram de un perfil utilizando el módulo Python Selenium .


  • Google Chrome o Firefox
  • Controlador Chrome (para Google Chrome) o controlador Gecko (para Mozilla Firefox)
  • Paquete Selenium: Es una poderosa herramienta para controlar un navegador web a través del programa. Es funcional para todos los navegadores, funciona en todos los principales sistemas operativos y sus scripts están escritos en varios idiomas, es decir, Python, Java, C#, etc. Se puede instalar con el siguiente comando:
pip install selenium 
  • Paquete Beautiful Soap : es una biblioteca de Python para extraer datos de archivos HTML y XML. Funciona con su analizador favorito para proporcionar formas idiomáticas de navegar, buscar y modificar el árbol de análisis. Se puede instalar con el siguiente comando:
pip install bs4
  • Paquete de requests: la biblioteca de requests es una parte integral de Python para realizar requests HTTP a una URL específica. Se puede instalar usando el siguiente comando:
pip install requests

Enfoque paso a paso:

Paso 1: importar módulos e ingresar la información de inicio de sesión junto con la URL de la página. 


# import required modules
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import selenium.common.exceptions
import time
from bs4 import BeautifulSoup as bs
import requests
import os
# get instagram account credentials
username = input('Enter Your User Name ')
password = input('Enter Your Password ')
# assign URL
url = '' + \
    input('Enter User Name Of User For Downloading Posts ')

Paso 2: Función para iniciar la nueva sesión de Navegador. Es posible que deba agregar la ruta al controlador web. Función Chrome(), depende de su instalación.


# get URL path
def path():
    global chrome
    # starts a new chrome session
    # add path if required
    chrome = webdriver.Chrome()

Paso 3: Función para ingresar la URL de la página. 


# extract URL
def url_name(url):
    # the web page opens up
    # webdriver will wait for 4 sec before throwing a
    # NoSuchElement exception so that the element
    # is detected and not skipped.

Paso 4: Función para ingresar su información de inicio de sesión. 


# login to access post
def login(username, your_password):
    log_but = chrome.find_element_by_class_name("L3NKy")
    # finds the username box
    usern = chrome.find_element_by_name("username")
    # sends the entered username
    # finds the password box
    passw = chrome.find_element_by_name("password")
    # sends the entered password
    # sends the enter key
    # Find Not Now  Button
    notn = chrome.find_element_by_class_name("yWX7d")

Paso 5: Función para abrir la primera publicación.


# function to get first post
def first_post():
    pic = chrome.find_element_by_class_name("kIKUG").click()

Paso 6: Función para descargar todas las publicaciones.


def download_allposts():
    # open First Post
    user_name = url.split('/')[-1]
    # check if folder corresponding to user name exist or not
    if(os.path.isdir(user_name) == False):
        # Create folder
    # Check if Posts contains multiple images or videos
    multiple_images = nested_check()
    if multiple_images:
        nescheck = multiple_images
        count_img = 0
        while nescheck:
            elem_img = chrome.find_element_by_class_name('rQDP3')
            # Function to save nested images
            save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img)
            count_img += 1
            nescheck = nested_check()
        # pass last_img_flag True
        save_multiple(user_name+'/'+'content1.' +
                      str(count_img), elem_img, last_img_flag=1)
        save_content('_97aPb', user_name+'/'+'content1')
    c = 2
        next_el = next_post()
        if next_el != False:
                multiple_images = nested_check()
                if multiple_images:
                    nescheck = multiple_images
                    count_img = 0
                    while nescheck:
                        elem_img = chrome.find_element_by_class_name('rQDP3')
                        save_multiple(user_name+'/'+'content' +
                                      str(c)+'.'+str(count_img), elem_img)
                        count_img += 1
                        nescheck = nested_check()
                    save_multiple(user_name+'/'+'content'+str(c) +
                                  '.'+str(count_img), elem_img, 1)
                    save_content('_97aPb', user_name+'/'+'content'+str(c))
            except selenium.common.exceptions.NoSuchElementException:
        c += 1

Paso 7: Función para hacer clic en la siguiente publicación.


# function to get next post
def next_post():
        nex = chrome.find_element_by_class_name("coreSpriteRightPaginationArrow")
        return nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0

Paso 8: Función para guardar publicaciones normales.


# Function to save content of the current post
def save_content(class_name,img_name):
        pic = chrome.find_element_by_class_name(class_name)
    except selenium.common.exceptions.NoSuchElementException:
        print("Either This user has no images or you haven't followed this user or something went wrong")
    html = pic.get_attribute('innerHTML')
    soup = bs(html,'html.parser')
    link = soup.find('video')
    if link:
        link = link['src']
        link = soup.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:

Paso 9: función para guardar publicaciones anidadas.


# Function to save multiple posts
def save_multiple(img_name,elem,last_img_flag = False):
    l = elem.get_attribute('innerHTML')
    html = bs(l,'html.parser')
    biglist = html.find_all('ul')
    biglist = biglist[0]
    list_images = biglist.find_all('li')
    if last_img_flag:
        user_image = list_images[-1]
        user_image = list_images[(len(list_images)//2)]
    video = user_image.find('video')
    if video:
        link = video['src']
        link = user_image.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:

Paso 10: función para verificar si la publicación está anidada o no.


# function to check if the post is nested
def nested_check():
        nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron  ')
        return nes_nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0

Paso 11: Llamar a las funciones requeridas en el código del controlador.


# Driver Code
login(username, password)

A continuación se muestra el programa completo basado en el enfoque anterior:


# import required modules
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import selenium.common.exceptions
import time
from bs4 import BeautifulSoup as bs
import requests
import os
# get instagram account credentials
username = input('Enter Your User Name ')
password = input('Enter Your Password ') 
# assign URL
url = '' + \
    input('Enter User Name Of User For Downloading Posts ')
# Get URL path
def path():
    global chrome
    # starts a new chrome session
    # add path if required
    chrome = webdriver.Chrome()
# Extract URL
def url_name(url):
    # the web page opens up
    # webdriver will wait for 4 sec before throwing a
    # NoSuchElement exception so that the element
    # is detected and not skipped.
# Login to access post
def login(username, your_password):
    log_but = chrome.find_element_by_class_name("L3NKy")
    # finds the username box
    usern = chrome.find_element_by_name("username")
    # sends the entered username
    # finds the password box
    passw = chrome.find_element_by_name("password")
    # sends the entered password
    # sends the enter key
    # Find Not Now  Button
    notn = chrome.find_element_by_class_name("yWX7d")
# Function to get content of first post
def first_post():
    pic = chrome.find_element_by_class_name("kIKUG").click()
# Function to get next post
def next_post():
        nex = chrome.find_element_by_class_name(
        return nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0
# Download content of all posts
def download_allposts():
    # open First Post
    user_name = url.split('/')[-1]
    # check if folder corresponding to user name exist or not
    if(os.path.isdir(user_name) == False):
        # Create folder
    # Check if Posts contains multiple images or videos
    multiple_images = nested_check()
    if multiple_images:
        nescheck = multiple_images
        count_img = 0
        while nescheck:
            elem_img = chrome.find_element_by_class_name('rQDP3')
            # Function to save nested images
            save_multiple(user_name+'/'+'content1.'+str(count_img), elem_img)
            count_img += 1
            nescheck = nested_check()
        # pass last_img_flag True
        save_multiple(user_name+'/'+'content1.' +
                      str(count_img), elem_img, last_img_flag=1)
        save_content('_97aPb', user_name+'/'+'content1')
    c = 2
        next_el = next_post()
        if next_el != False:
                multiple_images = nested_check()
                if multiple_images:
                    nescheck = multiple_images
                    count_img = 0
                    while nescheck:
                        elem_img = chrome.find_element_by_class_name('rQDP3')
                        save_multiple(user_name+'/'+'content' +
                                      str(c)+'.'+str(count_img), elem_img)
                        count_img += 1
                        nescheck = nested_check()
                    save_multiple(user_name+'/'+'content'+str(c) +
                                  '.'+str(count_img), elem_img, 1)
                    save_content('_97aPb', user_name+'/'+'content'+str(c))
            except selenium.common.exceptions.NoSuchElementException:
        c += 1
# Function to save content of the current post
def save_content(class_name, img_name):
        pic = chrome.find_element_by_class_name(class_name)
    except selenium.common.exceptions.NoSuchElementException:
        print("Either This user has no images or you haven't followed this user or something went wrong")
    html = pic.get_attribute('innerHTML')
    soup = bs(html, 'html.parser')
    link = soup.find('video')
    if link:
        link = link['src']
        link = soup.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:
# Function to save multiple posts
def save_multiple(img_name, elem, last_img_flag=False):
    l = elem.get_attribute('innerHTML')
    html = bs(l, 'html.parser')
    biglist = html.find_all('ul')
    biglist = biglist[0]
    list_images = biglist.find_all('li')
    if last_img_flag:
        user_image = list_images[-1]
        user_image = list_images[(len(list_images)//2)]
    video = user_image.find('video')
    if video:
        link = video['src']
        link = user_image.find('img')['src']
    response = requests.get(link)
    with open(img_name, 'wb') as f:
# Function to check if the post is nested
def nested_check():
        nes_nex = chrome.find_element_by_class_name('coreSpriteRightChevron  ')
        return nes_nex
    except selenium.common.exceptions.NoSuchElementException:
        return 0
# Driver Code
login(username, password)

Después de ejecutar este script completo, se creará un directorio que contendrá todas las publicaciones.


Nota: si es un usuario de Windows, las publicaciones se guardarán con la extensión .file , abra las publicaciones con una aplicación que pueda abrir tanto imágenes como videos (las publicaciones de Instagram solo tienen un tipo de medio, imagen o video)  

Artículo escrito por UnworthyProgrammer y traducido por Barcelona Geeks.

