regex - How to get all text in between two numbers using regular expression in Python? -


i have text in format:

text

all eyez on me track listing # title artisttime 1 ambitionz az ridah 2pac 4:39 2 u 2pac 4:37       fatal           yani hadati           dru down           snoop dogg           nair dogg           nate dogg     3 skandalouz 2pac 4:09       nate dogg     4 got mind made 2pac 5:13       kurupt           redman           method man           dat n daz     5 how want jojo elf 4:47       2pac     6 2 of amerikaz wanted 2pac 4:07       snoop dogg     7 no more pain 2pac 6:14 8 heartz of men 2pac 4:43 9 life goes on 2pac 5:02 10 god can judge me rappin' 4-tay 4:57       2pac     11 tradin war stories nair dogg 5:29       storm           cpo           c-bo           outlawz           2pac     12 california love [remix] dr. dre 6:25       2pac           roger     13 ain't mad @ cha 2pac 4:53       danny boy     14 what'z ya phone no. danny boy 5:10       2pac     15 (2) can't c me george clinton 5:30       2pac     16 (2) shorty wanna thug 2pac 3:51 17 (2) holla @ me 2pac 4:56 18 (2) wonda why call u b____ 2pac 4:19 19 (2) when ride nair dogg 5:09       2pac     20 (2) thug passion outlawz 5:08       storm           dramarydal           jewell           2pac     21 (2) picture me rollin' danny boy 5:15       2pac           cpo           big syke     22 (2) check out time big syke 4:39       kurupt           2pac     23 (2) ratha ya n____ 2pac 4:14       richie rich     24 (2) eyez on me big syke 5:08       2pac     25 (2) run tha streetz storm 5:17       nair dogg           michel'le           2pac     26 (2) ain't hard 2 find b-legit 4:29       e-40           c-bo           2pac           richie rich     27 (2) heaven ain't hard 2 find 2pac 3:58

from need obtain titles of songs.

so far, have

def extraction():      f = open('songs in albums list.txt', 'r')     str = 'text eyez on me track listing # title artisttime        1 ambitionz az ridah  2pac 4:39' \           '       2 u  2pac 4:37              fatal                 yani hadati                 ' \           'dru down                 snoop dogg                 nair dogg                 nate dogg          ' \           '3 skandalouz  2pac 4:09              nate dogg          4 got mind made  2pac 5:13              ' \           'kurupt                 redman                 method man                 dat nigga daz          ' \           '5 how want  jojo elf 4:47              2pac          6 2 of amerikaz wanted  ' \           '2pac 4:07              snoop dogg          7 no more pain  2pac 6:14       8 heartz of men  2pac 4:43       ' \           '9 life goes on  2pac 5:02       10 god can judge me  rappin 4-tay 4:57              2pac          ' \           '11 tradin war stories  nair dogg 5:29              storm                 cpo                 c-bo' \           '                 outlawz                 2pac          12 california love [remix]  dr. dre 6:25              ' \           '2pac                 roger          13 aint mad @ cha  2pac 4:53              danny boy          ' \           '14 whatz ya phone no.  danny boy 5:10              2pac          15 (2) cant c me  george clinton 5:30' \           '              2pac          16 (2) shorty wanna thug  2pac 3:51       17 (2) holla @ me  2pac 4:56' \           '       18 (2) wonda why call u b____  2pac 4:19       19 (2) when ride  nair dogg 5:09' \           '              2pac          20 (2) thug passion  outlawz 5:08              storm                 ' \           'dramarydal                 jewell                 2pac          21 (2) picture me rollin  danny boy 5:15' \           '              2pac                 cpo                 big syke          22 (2) check out time  ' \           'big syke 4:39              kurupt                 2pac          23 (2) ratha ya n____  2pac 4:14' \           '              richie rich          24 (2) eyez on me  big syke 5:08              2pac          ' \           '25 (2) run tha streetz  storm 5:17              nair dogg                 michelle                 ' \           '2pac          26 (2) aint hard 2 find  b-legit 4:29              e-40                 c-bo                 ' \           '2pac                 richie rich          27 (2) heaven aint hard 2 find  2pac 3:58'       st = " ".join(str.split())     songs = re.findall(r'\d{0,3}(.+?):', st, re.i|re.m)     # songs = songs.replace("\xc2\xa0", " ")     s = " ".join(songs)     s = s.replace("\xc2\xa0", " ")     print s     # s = re.sub("^\d+\s|\s\d+\s|\s\d+$", " ", s)     print s     t = re.findall(r'\s*[a-za-z0-9]\s*', s, re.i|re.m)     x = []     ind = []     y = []     z = 0     item in t:         if len(item) > 2:             y.append(z)             x.append(t[t.index(item)])             ind.append(t.index(item))         z = z + 1     print y     new_x = []     string in x:         new_x.append(string.split(' '));     l = []      item in new_x:         val in item:             l.append(filter(lambda space: space.strip(), val))     # print l     l = filter(lambda space: space.strip(), l)      x = 0     vals in y:         print vals         t.pop(vals)         t.insert(vals, l[y.index(vals)])     print t[20], t[33], t[38], t[48]     vals in reversed(y):         t.insert(vals+1, ' ')     t = ''.join(t)     t = re.findall(r'\d{0,3}\s*(.+)\s*\d', t, re.i|re.m)      print t 

which returns string this:

['text eyez on me track listing  title artisttime 1 ambitionz az ridah 2pac 4 2 u 2pac 4 fatal yani hadati dru down snoop dogg nair dogg nate dogg 3 skandalouz 2pac 4 nate dogg 4 got mind made 2pac 5 kurupt redman method man dat n daz 5 how want jojo elf 4 2pac 6 2 of amerikaz wanted 2pac 4 snoop dogg 7 no more pain 2pac 6 8 heartz of men 2pac 4 9 life goes on 2pac 5 10 god can judge me rappin 4tay 4 2pac 11 tradin war stories nair dogg 5 storm cpo cbo outlawz 2pac 12 california love remix dr dre 6 2pac roger 13 aint mad @ cha 2pac 4 danny boy 14 whatz ya phone no danny boy 5 2pac 15 2 cant c me george clinton 5 2pac 16 2 shorty wanna thug 2pac 3 17 2 holla @ me 2pac 4 18 2 wonda why call u b 2pac 4 19 2 when ride nair dogg 5 2pac 20 2 thug passion outlawz 5 storm dramarydal jewell 2pac 21 2 picture me rollin danny boy 5 2pac cpo big syke 22 2 check out time big syke 4 kurupt 2pac 23 2 ratha ya n 2pac 4 richie rich 24 2 eyez on me big syke 5 2pac 25 2 run tha streetz storm 5 nair dogg michelle 2pac 26 2 aint hard 2 find blegit 4 e40 cbo 2pac richie rich 27 2 heaven aint hard 2 find 2pac '] 

i want text between numbers , filter them find songs. there better way titles of songs list?

why over-stressing re?

import re  blah = """all eyez on me track listing # title artisttime 1 ambitionz az ridah 2pac 4:39 2 u 2pac 4:37       fatal           yani hadati           dru down           snoop dogg           nair dogg           nate dogg     3 skandalouz 2pac 4:09       nate dogg     4 got mind made 2pac 5:13       kurupt           redman           method man           dat n daz     5 how want jojo elf 4:47       2pac     6 2 of amerikaz wanted 2pac 4:07       snoop dogg     7 no more pain 2pac 6:14 8 heartz of men 2pac 4:43 9 life goes on 2pac 5:02 10 god can judge me rappin' 4-tay 4:57       2pac     11 tradin war stories nair dogg 5:29       storm           cpo           c-bo           outlawz           2pac     12 california love [remix] dr. dre 6:25       2pac           roger     13 ain't mad @ cha 2pac 4:53       danny boy     14 what'z ya phone no. danny boy 5:10       2pac     15 (2) can't c me george clinton 5:30       2pac     16 (2) shorty wanna thug 2pac 3:51 17 (2) holla @ me 2pac 4:56 18 (2) wonda why call u b____ 2pac 4:19 19 (2) when ride nair dogg 5:09       2pac     20 (2) thug passion outlawz 5:08       storm           dramarydal           jewell           2pac     21 (2) picture me rollin' danny boy 5:15       2pac           cpo           big syke     22 (2) check out time big syke 4:39       kurupt           2pac     23 (2) ratha ya n____ 2pac 4:14       richie rich     24 (2) eyez on me big syke 5:08       2pac     25 (2) run tha streetz storm 5:17       nair dogg           michel'le           2pac     26 (2) ain't hard 2 find b-legit 4:29       e-40           c-bo           2pac           richie rich     27 (2) heaven ain't hard 2 find 2pac 3:58 extra"""  def extraction2(s):     s = re.sub(r'\s+', " ", s)     tracks = []     trackno = 1     while 1:         track = { "trackno" : trackno, "title" : "", "duration": none }         # start of next track         from_ = s.find(str(trackno))         if from_ < 0:             # last title has additional artists (not case in example)             tracks[trackno-2]["title"] += " " + s.strip()         else:             if trackno > 1 , from_ > 0:                 # add "trailing" artists previous track                 tracks[trackno-2]["title"] += s[:from_].strip()             # time indicates end of track             m = re.search(r'\d{1,2}:\d{2}', s[from_:])             if m:                 line = s[from_:from_+m.end()].split(" ")                 track["title"] = " ".join(line[1:-1]).strip()                 track["duration"] = line[-1:][0]                 tracks.append(track)         if not track["duration"]:             break         s = s[from_+m.end():]         trackno += 1     return tracks   tracklist = extraction2(blah) import json print json.dumps(tracklist, indent=4) 

while @wiktor stribiżew, love puzzles ;)

remark regarding original code: not style use name of builtin type str parameter name.


Comments

Popular posts from this blog

php - Wordpress website dashboard page or post editor content is not showing but front end data is showing properly -

javascript - Get parameter of GET request -

javascript - Twitter Bootstrap - how to add some more margin between tooltip popup and element -