BeautifyMP3/addMetadata.py

129 lines
3.8 KiB
Python

import sys
import os
from os.path import basename
import spotipy
import spotipy.oauth2 as oauth2
import re
from titlecase import titlecase
import requests
from bs4 import BeautifulSoup
def improve_song_names(songs):
char_filters = "()[]{}-:_/=+\"\'"
word_filters = ('lyrics', 'lyric', 'by', 'video', 'official', 'hd', 'dirty', 'with', 'lyrics', 'feat', 'original', 'mix',
'www', 'com', 'mp3', 'audio', 'remixed', 'remix', 'full', 'version', 'music', 'hq', 'uploaded', 'explicit')
reg_exp = 's/^\d\d //'
improved_names = []
for song in songs:
song = song.strip()
song = song.lstrip("0123456789.- ")
# re.sub(reg_exp, '', song)
song = song[0:-4]
song = ''.join(
map(lambda c: " " if c in char_filters else c, song))
song = re.sub('|'.join(re.escape(key) for key in word_filters),
"", song, flags=re.IGNORECASE)
song = ' '.join(song.split())
improved_names.append(song.strip())
return improved_names
def get_song_name(title, artist):
return title + ' - ' + artist
def get_lyrics_genius(song_name):
GENIUS_KEY = "TxvC3bwsONZ1lh3j8Idq-VFBXKk5SlwYoAyGwkWFvbqkJB8vwdMGV7b5Q4sJSTd2"
base_url = "https://api.genius.com"
headers = {'Authorization': 'Bearer %s' % (GENIUS_KEY)}
search_url = base_url + "/search"
data = {'q': song_name}
response = requests.get(search_url, data=data, headers=headers)
json = response.json()
try:
song_info = json['response']['hits'][0]['result']['api_path']
except KeyError:
print("Could not find lyrics")
return None
song_url = base_url + song_info
response = requests.get(song_url, headers=headers)
json = response.json()
song_path = json['response']['song']['path']
song_url = "http://genius.com" + song_path
page = requests.get(song_url)
html = BeautifulSoup(page.text, "html.parser")
# remove script tags that they put in the middle of the lyrics
[h.extract() for h in html('script')]
lyrics = html.find("div", class_="lyrics").get_text()
lyrics.replace('\n', ' ')
return lyrics
def get_metadata_spotify(spotify, song_name):
metadata = {}
meta_tags = spotify.search(song_name, limit=1)['tracks']['items'][0]
metadata['title'] = meta_tags['name']
metadata['artist'] = meta_tags['artists'][0]['name']
metadata['album'] = meta_tags['album']['name']
metadata['album_artist'] = meta_tags['album']['artists'][0]['name']
album_id = meta_tags['album']['id']
album_meta_tags = spotify.album(album_id)
metadata['release-date'] = album_meta_tags['release_date']
try:
metadata['genre'] = titlecase(album_meta_tags['genres'][0])
except IndexError:
try:
artist_id = meta_tags['artists'][0]['id']
artist_meta_tags = spotify.artist(artist_id)
metadata['genre'] = titlecase(artist_meta_tags['genres'][0])
except IndexError:
pass
metadata['track_num'] = meta_tags['track_number']
metadata['disc_num'] = meta_tags['disc_number']
metadata['albumart'] = meta_tags['album']['images'][0]['url']
metadata['lyrics'] = get_lyrics_genius(
get_song_name(metadata['title'], metadata['artist']))
return metadata
def list_files():
files = []
return [f for f in os.listdir('.') if f.endswith('.mp3')]
def main():
auth = oauth2.SpotifyClientCredentials(
client_id='622a0e16a4914e3eadc2a37b4a134f1e',
client_secret='6fe008a8b7754954a58a9849fa3172df')
token = auth.get_access_token()
spotify = spotipy.Spotify(auth=token)
files = list_files()
improved_name = improve_song_names(files)
# print(improved_name[19])
# print(files[16])
metadata = get_metadata_spotify(
spotify, improved_name[19])
if __name__ == "__main__":
main()