From a1ebb15b286f36a79fa5168dcfb8dd8efca76f58 Mon Sep 17 00:00:00 2001 From: alistairjcbrown <635903+alistairjcbrown@users.noreply.github.com> Date: Wed, 27 May 2026 09:34:56 +0000 Subject: [PATCH] Update title normalization test --- common/known-removable-phrases.js | 4 + common/normalize-title.js | 4 +- common/tests/test-titles.json | 588 ++++++++++++++++++++++++++++++ 3 files changed, 595 insertions(+), 1 deletion(-) diff --git a/common/known-removable-phrases.js b/common/known-removable-phrases.js index f84227da..7807d234 100644 --- a/common/known-removable-phrases.js +++ b/common/known-removable-phrases.js @@ -50,6 +50,7 @@ const knownRemovablePhrases = [ "(SRX) ", "(SCX) ", "(Screen X) ", + "(Screening) ", "(3D) ", "(3D):", "Live In 3D", @@ -119,6 +120,7 @@ const knownRemovablePhrases = [ " Watch Party", ": Live Q&A with Director Kevin Macdonald", ": LIVE VIEWING", + "LIVE VIEWING IN BUSAN", ": IMMERSION IN CINEMAS", "VR CONCERT : IMMERSION", ": Chicken Jockey Screening", @@ -892,6 +894,7 @@ const knownRemovablePhrases = [ "Records, cocktails: ", "Refugee Week:", "REFUGEE WEEK FREE EVENT:", + "REProduce Viewing Room:", "Rob Reiner Celebration", "Roman party, divine chorals: ", "Outdoor Cinema:", @@ -1090,6 +1093,7 @@ const knownRemovablePhrases = [ "Parallel histories of queer revolt", "Tezuka's", "Praavinkoodu ", + "Prickly pear cocktails, ", "/Minus Colour", "/ Minus Colour", "Aleksandar Petrović's ", diff --git a/common/normalize-title.js b/common/normalize-title.js index 6f16eb84..ad5ee79f 100644 --- a/common/normalize-title.js +++ b/common/normalize-title.js @@ -720,7 +720,7 @@ function normalizeTitle(title, options) { ["Andre Rieu - ", "Andre Rieu: "], ["Andre Rieu: Summer 2026:", "Andre Rieu's 2026 Summer Concert:"], [" + UK Premiere: Replikka", " + Replikka"], - [/ \+ Iggy Pop [–�] Lust for life/i, " & Lust for life"], + [/ [+&] Iggy Pop [–\-�] Lust for life/i, " & Lust for life"], [ /Bluey At The Cinema - Playdates$/i, "Bluey At The Cinema: Playdates with Friends", @@ -732,6 +732,7 @@ function normalizeTitle(title, options) { [" - Oggi", ": Oggi"], [" x metropolis", " metropolis"], ["Nick Drake: A Skin Too Few", "A Skin Too Few: The Days of Nick Drake"], + ["(Screening) / ", "(Screening) & "], // Variant families collapsed from known-removable-phrases.js // Each pattern covers multiple near-identical string entries that shared a common structure [/dog[- ]?friendly(?:\s+screening)?[:\s]*/i, ""], @@ -746,6 +747,7 @@ function normalizeTitle(title, options) { [/l.ff(?:\s+202\d)?:/i, ""], [/liaf(?:\s+\d+)?:/i, ""], [/hkff(?:uk)?(?:\s+\d+)?:/i, ""], + [/ AV SHOW$/i, ""], ["Tercera Video Club #2 - ", "Tercera Video Club #2: "], [ "Argentine season launch: Live music + ", diff --git a/common/tests/test-titles.json b/common/tests/test-titles.json index d13a1b0a..22accfc8 100644 --- a/common/tests/test-titles.json +++ b/common/tests/test-titles.json @@ -72518,5 +72518,593 @@ { "input": "Above And Below The Line Exhibition Launch Event: Italian Women In The Film Industry, Past & Present", "output": "above below the line exhibition launch event italian women in the film industry past present" + }, + { + "input": "CARERS & BABIES: TUNER", + "output": "tuner" + }, + { + "input": "EXHIBITION ON SCREEN: JAMES McNEILL WHISTLER", + "output": "james mcneill whistler" + }, + { + "input": "EXHIBITION ON SCREEN: RENOIR & LOVE", + "output": "renoir love" + }, + { + "input": "Film House", + "output": "film house" + }, + { + "input": "Stage, Screen & Social Change with Compass Collective", + "output": "stage screen social change with compass collective" + }, + { + "input": "25th Anniversary screening – Lagaan: Once Upon a Time in India + intro", + "output": "lagaan once upon a time in india" + }, + { + "input": "Rose of Nevada + live score performed by The Cornish Sound Unit", + "output": "rose of nevada" + }, + { + "input": "50th Anniversary Screening: Bugsy Malone + Q&A", + "output": "bugsy malone" + }, + { + "input": "A Better Tomorrow + intro by Dr Lin Feng, University of Leicester", + "output": "a better tomorrow" + }, + { + "input": "A Better Tomorrow II", + "output": "a better tomorrow ii" + }, + { + "input": "All About Eve + intro by film and cultural events programmer Caroline Cassin", + "output": "all about eve" + }, + { + "input": "As Young as You Feel", + "output": "as young as you feel" + }, + { + "input": "As Young as You Feel + intro", + "output": "as young as you feel" + }, + { + "input": "Bhaji on the Beach", + "output": "bhaji on the beach" + }, + { + "input": "Bhaji on the Beach + intro by Bhavini Goyate, BFI Inclusion Manager", + "output": "bhaji on the beach" + }, + { + "input": "Blue Black Permanent + intro", + "output": "blue black permanent" + }, + { + "input": "Breaking Away + pre-recorded intro by Philip Dobson, founder of upCYCLE LDN", + "output": "breaking away" + }, + { + "input": "Bullet in the Head + intro by film critic Katie Smith-Wong", + "output": "bullet in the head" + }, + { + "input": "Bus Stop + intro", + "output": "bus stop" + }, + { + "input": "Candy Mountain", + "output": "candy mountain" + }, + { + "input": "Candy Mountain + intro by Jason Wood, BFI Executive Director for Public Programmes and Audiences", + "output": "candy mountain" + }, + { + "input": "Clash by Night", + "output": "clash by night" + }, + { + "input": "Clash by Night + intro by season curator Kimberley Sheehan", + "output": "clash by night" + }, + { + "input": "Don’t Bother to Knock", + "output": "dont bother to knock" + }, + { + "input": "Don’t Bother to Knock + intro", + "output": "dont bother to knock" + }, + { + "input": "Eve and Marilyn + Q&A with Eve Arnold’s grandson, Michael Arnold", + "output": "eve marilyn" + }, + { + "input": "Face/Off", + "output": "face/off" + }, + { + "input": "Funday: Charlie and Lola – 25th Anniversary + Q&A with Lauren Child", + "output": "charlie lola" + }, + { + "input": "Gentlemen Prefer Blondes + intro", + "output": "gentlemen prefer blondes" + }, + { + "input": "Goodness Gracious Me Reunion + Q&A with cast", + "output": "goodness gracious me reunion" + }, + { + "input": "Happy Together + intro by curator and writer Ying-Di Yin", + "output": "happy together" + }, + { + "input": "Hard Boiled + intro by Xavier Alexandre Pillai, BFI TV Programmer", + "output": "hard boiled" + }, + { + "input": "Hard Target", + "output": "hard target" + }, + { + "input": "How to Marry a Millionaire + intro", + "output": "how to marry a millionaire" + }, + { + "input": "India’s AI & Film Future + discussion", + "output": "indias ai film future" + }, + { + "input": "Ish", + "output": "ish" + }, + { + "input": "It’s Love Again + intro by Maureen Footer", + "output": "its love again" + }, + { + "input": "John Woo Double Bill: Red Cliff Part I + Red Cliff Part II", + "output": "john woo double bill red cliff part i" + }, + { + "input": "Kings of the Road", + "output": "kings of the road" + }, + { + "input": "Ladies of the Chorus", + "output": "ladies of the chorus" + }, + { + "input": "Ladies of the Chorus + intro by season curator Kimberley Sheehan", + "output": "ladies of the chorus" + }, + { + "input": "Library Talk: Lost Star – The Rediscovery of the Transnational Stardom of Belinda Lee", + "output": "lost star the rediscovery of the transnational stardom of belinda lee" + }, + { + "input": "LIFF Opening Night – European Premiere: 52 Blue + intro and Q&A with cast and crew", + "output": "52 blue" + }, + { + "input": "Little Amelie", + "output": "little amelie" + }, + { + "input": "Mad Max 2", + "output": "mad max 2" + }, + { + "input": "Marilyn in Her Own Words... + intro", + "output": "marilyn in her own words" + }, + { + "input": "Marilyn Monroe Study Day", + "output": "marilyn monroe study day" + }, + { + "input": "Marilynspirations Double Bill", + "output": "marilynspirations double bill" + }, + { + "input": "Member exclusive: The Girls", + "output": "the girls" + }, + { + "input": "Member Picks: Fiddler on the Roof", + "output": "fiddler on the roof" + }, + { + "input": "Missing Believed Wiped: Mid-Summer Special", + "output": "missing believed wiped midsummer special" + }, + { + "input": "Murder on the Orient Express", + "output": "murder on the orient express" + }, + { + "input": "Murder on the Orient Express + intro with writer Sophie Hannah, hosted by Kazuo Ishiguro", + "output": "murder on the orient express" + }, + { + "input": "New Brit-Asian Shorts + Q&A", + "output": "new britasian shorts" + }, + { + "input": "New Writings: Feel the Floor – Restoring the Life and Legacy of Jazz Choreographer Buddy Bradley", + "output": "new writings feel the floor restoring the life legacy of jazz choreographer buddy bradley" + }, + { + "input": "New Writings: Hard Streets – Working Class Lives in Charlie Chaplin’s London with Jacqueline Riding", + "output": "new writings hard streets working class lives in charlie chaplins london with jacqueline riding" + }, + { + "input": "Night Train", + "output": "night train" + }, + { + "input": "Night Train + intro with Professor Philip Horne, hosted by Kazuo Ishiguro", + "output": "night train" + }, + { + "input": "North by Northwest", + "output": "north by northwest" + }, + { + "input": "Nostalgia", + "output": "nostalgia" + }, + { + "input": "Once a Thief", + "output": "once a thief" + }, + { + "input": "Preview: Ish + Q&A with co-writer, director and composer Imran Perretta + cast (tbc)", + "output": "ish" + }, + { + "input": "Radio On + pre-recorded intro by director Christopher Petit", + "output": "radio on" + }, + { + "input": "Relaxed screening: Trains! + intro and discussion", + "output": "trains" + }, + { + "input": "Rome Express", + "output": "rome express" + }, + { + "input": "Rome Express + intro with writer Jonathan Coe, hosted by writer and season curator Kazuo Ishiguro", + "output": "rome express" + }, + { + "input": "Runaway Train", + "output": "runaway train" + }, + { + "input": "Season Introduction Station to Station: Kazuo Ishiguro’s Top Ten Train Films", + "output": "season introduction station to station kazuo ishiguros top ten train films" + }, + { + "input": "Seniors’ Free Matinee: After Hours + intro", + "output": "after hours" + }, + { + "input": "Seniors’ Matinee: Blue Heron", + "output": "blue heron" + }, + { + "input": "Shanghai Express + intro by writer and season curator Kazuo Ishiguro", + "output": "shanghai express" + }, + { + "input": "Some Like It Hot + intro", + "output": "some like it hot" + }, + { + "input": "Sound and Vision: Films by Helen Petts + Helen Petts in conversation with musician Steve Beresford", + "output": "sound vision films by helen petts" + }, + { + "input": "Stranger Than Paradise", + "output": "stranger than paradise" + }, + { + "input": "Stranger Than Paradise + intro", + "output": "stranger than paradise" + }, + { + "input": "The Seven Year Itch + intro by Ruby McGuigan, BFI Programme Development Manager", + "output": "seven year itch" + }, + { + "input": "Tickets", + "output": "tickets" + }, + { + "input": "TransSiberian", + "output": "transsiberian" + }, + { + "input": "TV Preview: The Undeclared War Series 2 + Q&A with cast and creator (tbc)", + "output": "undeclared war series 2" + }, + { + "input": "UK premiere of 4K restoration: 3 Bad Men + intro by Bryony Dixon, Rosie Taylor and Makeda Doyal", + "output": "3 bad men" + }, + { + "input": "UK Premiere of 4K Restoration: Bashu, the Little Stranger + intro by film curator Ehsan Khoshbakht", + "output": "bashu the little stranger" + }, + { + "input": "UK Premiere of 4K Restoration: In Which Annie Gives It Those Ones + Q&A", + "output": "in which annie gives it those ones" + }, + { + "input": "UK Premiere: In Search of the Sky + discussion", + "output": "in search of the sky" + }, + { + "input": "What Could Have Been: Something’s Got to Give", + "output": "what could have been somethings got to give" + }, + { + "input": "White Snow", + "output": "white snow" + }, + { + "input": "Pocahontas", + "output": "pocahontas" + }, + { + "input": "He Ran All the Way (1951)", + "output": "he ran all the way (1951)" + }, + { + "input": "£2 Family Films : Time Hoppers: The Silk Road", + "output": "time hoppers the silk road" + }, + { + "input": "Athidhi (Re-release) (Telugu)", + "output": "athidhi" + }, + { + "input": "Blast (Tamil)", + "output": "blast" + }, + { + "input": "Chardikala (Punjabi)", + "output": "chardikala" + }, + { + "input": "Drishyam 3 - Cut Version (Malayalam)", + "output": "drishyam 3" + }, + { + "input": "El Kalam Ala Eh (Arabic)", + "output": "el kalam ala eh" + }, + { + "input": "Peddi (Hindi)", + "output": "peddi" + }, + { + "input": "Lambeth Mutual Aid Screening for Palestine & Lebanon", + "output": "lambeth mutual aid screening for palestine lebanon" + }, + { + "input": "Star Wars: The Mandalorian and Grogu (3D Version)", + "output": "star wars the mandalorian grogu" + }, + { + "input": "My Father's Island", + "output": "my fathers island" + }, + { + "input": "Portrait of a Lady on Fire (4K Re-release)", + "output": "portrait of a lady on fire" + }, + { + "input": "Free Willy", + "output": "free willy" + }, + { + "input": "Space Jam", + "output": "space jam" + }, + { + "input": "RBO Cinema Season 2026-27: Swan Lake", + "output": "royal ballet opera 2026 swan lake" + }, + { + "input": "RBO Cinema Season 2026-27: The Nutcracker", + "output": "royal ballet opera 2026 the nutcracker" + }, + { + "input": "Relaxed Screening: Toy Story 5", + "output": "toy story 5" + }, + { + "input": "Royal Ballet and Opera: Alice’s Adventures in Wonderland", + "output": "royal ballet opera 2025 adventures in wonderland" + }, + { + "input": "Bat Out of Hell - The Musical (2025)", + "output": "bat out of hell the musical (2025)" + }, + { + "input": "Handa Opera on Sydney Harbour", + "output": "handa opera on sydney harbour" + }, + { + "input": "The Dude", + "output": "the dude" + }, + { + "input": "We're Nothing At All", + "output": "were nothing at all" + }, + { + "input": "Babykino: Eagles of the Republic", + "output": "eagles of the republic" + }, + { + "input": "Babykino: Enzo", + "output": "enzo" + }, + { + "input": "Babykino: My Mother's Wedding", + "output": "my mothers wedding" + }, + { + "input": "Babykino: The Christophers", + "output": "the christophers" + }, + { + "input": "Bad Shabbos", + "output": "bad shabbos" + }, + { + "input": "Familiar Touch", + "output": "familiar touch" + }, + { + "input": "The Bride of Belsen", + "output": "bride of belsen" + }, + { + "input": "BTS World Tour 'Arirang' in Busan: Live Viewing", + "output": "bts world tour arirang" + }, + { + "input": "El Kalam Ala Eh", + "output": "el kalam ala eh" + }, + { + "input": "BTS WORLD TOUR 'ARIRANG' LIVE VIEWING IN BUSAN", + "output": "bts world tour arirang" + }, + { + "input": "Chardikala", + "output": "chardikala" + }, + { + "input": "Disclosure Day (70mm)", + "output": "disclosure day" + }, + { + "input": "Drishyam 3 (Malayalam) (12A version)", + "output": "drishyam 3" + }, + { + "input": "Patients Not Passports / Refugee Week Film Screening", + "output": "patients not passports" + }, + { + "input": "Erupcja + Q&A", + "output": "erupcja" + }, + { + "input": "Met Opera Live 2026-27: Parsifal", + "output": "metropolitan opera parsifal" + }, + { + "input": "FOR THE LOVE OF ROMCOMS - A SELECTION OF COMEDY SHORTS", + "output": "for the love of romcoms" + }, + { + "input": "CinemaItaliaUK Special Screening of Giulio Regeni, All The Evil In The World + Introduction", + "output": "giulio regeni all the evil in the world" + }, + { + "input": "C&B: Fairyland", + "output": "fairyland" + }, + { + "input": "DJ YODA GOES TO THE MOVIES AV SHOW", + "output": "dj yoda goes to the movies" + }, + { + "input": "Hackney Art Week: LOVE INFINITY + Intro", + "output": "love infinity" + }, + { + "input": "Pink Palace: VEGAS IN SPACE", + "output": "vegas in space" + }, + { + "input": "TV Party, Tonight presents: ENO (1973) & Iggy Pop - Lust for life (1986)", + "output": "eno (1973) lust for life (1986)" + }, + { + "input": "TV PARTY, TONIGHT!: Saul Bass - The Art of The Title + Shorts", + "output": "tv party tonight saul bass" + }, + { + "input": "Fundraiser Q&A: Madfabulous", + "output": "madfabulous" + }, + { + "input": "Special Preview: Erupcja + Q&A", + "output": "erupcja" + }, + { + "input": "Impact Films: how to make compelling films that create change", + "output": "impact films how to make compelling films that create change" + }, + { + "input": "Bhowani Junction: 70th Anniversary", + "output": "bhowani junction" + }, + { + "input": "City Lights presents Bend It Like Beckham", + "output": "bend it like beckham" + }, + { + "input": "Days and Nights in the Forest: New Restoration + Pre-recorded introduction by Wes Anderson", + "output": "days nights in the forest new restoration" + }, + { + "input": "John the Violent", + "output": "john the violent" + }, + { + "input": "London Animation Club presents ARCHIVES, COLLAGE & CHAOS: the films of Joan Ashworth & Laurie Hill", + "output": "archives collage chaos the films of joan ashworth laurie hill" + }, + { + "input": "Prickly pear cocktails, Cactus Pears + Q&A", + "output": "cactus pears" + }, + { + "input": "Taiyupian: Xue Pinggui and Wang Baochuan + Intro", + "output": "taiyupian xue pinggui wang baochuan" + }, + { + "input": "The Cup (Phörpa) + Live Music", + "output": "the cup" + }, + { + "input": "The Girls (Gehenu Lamai): New Restoration", + "output": "the girls (gehenu lamai) new restoration" + }, + { + "input": "The Warrior: 25th Anniversary Screening + Q&A", + "output": "the warrior" + }, + { + "input": "REProduce Viewing Room: Koi Hain (Screening) / Bass Boss (Screening)", + "output": "koi hain bass boss" + }, + { + "input": "Slowly, slowly, the tongue unrolls: an evening of poetry and sound", + "output": "slowly slowly the tongue unrolls an evening of poetry sound" } ]